18#ifndef TMVA_DNN_ARCHITECTURES_CUDNN 
   19#define TMVA_DNN_ARCHITECTURES_CUDNN 
   21#include "RConfigure.h"    
   24#error This file can be compiled only when cudnn is available in ROOT 
   66template<
typename AFloat = Float_t>
 
   73   using Scalar_t       = AFloat;
 
   94#if (CUDNN_VERSION >= 8000) 
  104   using ConvLayer_t             = CNN::TConvLayer<TCudnn<AFloat>>;
 
  105   using ConvDescriptors_t       = CNN::TCNNDescriptors<ConvLayer_t>;
 
  106   using ConvWorkspace_t         = CNN::TCNNWorkspace<ConvLayer_t>;
 
  107   using PoolingLayer_t          = CNN::TMaxPoolLayer<TCudnn<AFloat>>;
 
  108   using PoolingDescriptors_t    = CNN::TCNNDescriptors<PoolingLayer_t>;
 
  109   using PoolingWorkspace_t      = CNN::TCNNWorkspace<PoolingLayer_t>;
 
  111   using RNNLayer_t              = RNN::TBasicRNNLayer<TCudnn<AFloat>>;
 
  112   using RNNDescriptors_t        = RNN::TRNNDescriptors<TCudnn<AFloat>>;
 
  113   using RNNWorkspace_t          = RNN::TRNNWorkspace<TCudnn<AFloat>>;
 
  115   using LSTMLayer_t             = RNN::TBasicLSTMLayer<TCudnn<AFloat>>;
 
  119   using GRULayer_t              = RNN::TBasicGRULayer<TCudnn<AFloat>>;
 
  140   static Tensor_t CreateTensor(
size_t n, 
size_t c, 
size_t h, 
size_t w) {
 
  141      return Tensor_t( {
n,
c,
h,
w}, GetTensorLayout(), 0, 0);
 
  144   static Tensor_t CreateTensor(DeviceBuffer_t buffer, 
size_t n, 
size_t c, 
size_t h, 
size_t w) {
 
  145      return Tensor_t( buffer, {
n,
c,
h,
w}, GetTensorLayout(), 0, 0);
 
  148   static Tensor_t CreateTensor(
size_t n, 
size_t c, 
size_t w)
 
  150      return Tensor_t({
n, 
c, 
w}, GetTensorLayout(), 0, 0);
 
  153   static Tensor_t CreateTensor(DeviceBuffer_t buffer, 
size_t n, 
size_t c, 
size_t w)
 
  155      return Tensor_t(buffer, {
n, 
c, 
w}, GetTensorLayout(), 0, 0);
 
  158   static bool IsCudnn() { 
return true; }
 
  162   static void  CreateWeightTensors( std::vector<Matrix_t> & 
newWeights, 
const std::vector<Matrix_t> & weights) {
 
  164      size_t n =  weights.size();
 
  165      for (
size_t i = 0; i < 
n; ++i)
 
  166         newWeights.emplace_back( weights[i].GetShape(), weights[i].GetLayout(), 0, 0);
 
  173   static void InitializeBNormDescriptors(TDescriptors * & 
descriptors,
 
  174                                          BNormLayer_t *L = 
nullptr);
 
  176   static void InitializeConvDescriptors(TDescriptors * & 
descriptors,
 
  177                                         ConvLayer_t *L = 
nullptr);
 
  179   static void InitializePoolDescriptors(TDescriptors * & 
descriptors,
 
  180                                        PoolingLayer_t *L = 
nullptr);
 
  192   template<
typename RNNLayer>
 
  197   static void InitializeActivationDescriptor(ActivationDescriptor_t & 
descriptors, EActivationFunction 
activFunc, 
double coef = 0.0);
 
  199   static void ReleaseConvDescriptors(TDescriptors    * 
descriptors );
 
  200   static void ReleasePoolDescriptors(TDescriptors * 
descriptors );
 
  201   static void ReleaseRNNDescriptors(TDescriptors *
descriptors);
 
  202   static void ReleaseBNormDescriptors(TDescriptors * 
descriptors );
 
  203   static void ReleaseDescriptor(EmptyDescriptor_t       & 
emptyDescr) {}        
 
  204   static void ReleaseDescriptor(ActivationDescriptor_t  & 
activationDescr);
 
  206   static void ReleaseDescriptor(DropoutDescriptor_t     & 
dropoutDescr);
 
  207   static void ReleaseDescriptor(FilterDescriptor_t      & 
filterDescr);
 
  208   static void ReleaseDescriptor(PoolingDescriptor_t     & 
poolingDescr);
 
  209   static void ReleaseDescriptor(TensorDescriptor_t      & 
tensorDescr);
 
  212   static void InitializeConvWorkspace(TWorkspace * & workspace,
 
  214                                       const DNN::CNN::TConvParams & params,
 
  215                                       ConvLayer_t *L = 
nullptr);
 
  216   static void InitializePoolDropoutWorkspace(TWorkspace * & workspace,
 
  218                                       const DNN::CNN::TConvParams & params,
 
  219                                       PoolingLayer_t *L = 
nullptr);
 
  233   template<
typename RNNLayer>
 
  237   static void FreeConvWorkspace(TWorkspace * workspace);
 
  238   static void FreePoolDropoutWorkspace(TWorkspace * workspace);
 
  239   static void FreeRNNWorkspace(TWorkspace *workspace);
 
  245   template <
typename RNNLayer>
 
  260   static void MultiplyTranspose(Tensor_t &
output, 
const Tensor_t &
input, 
const Matrix_t &weights);
 
  283                        const Matrix_t & weights,
 
  287   static void ScaleAdd(Tensor_t & A, 
const Tensor_t & B,
 
  288                        Scalar_t alpha = 1.0,
 
  289                        Scalar_t beta = 1.0);
 
  292   static void Copy(Tensor_t & A, 
const Tensor_t & B);
 
  295   template<
typename ATensor_t>
 
  296   static void CopyDiffArch(Tensor_t & A,
 
  299   template <
typename ATensor_t>
 
  303   static void CopyDiffArch(Tensor_t A, 
const Tensor_t & B ) { 
Copy(A,B); }
 
  306   template<
typename AMatrix_t>
 
  307   static void CopyDiffArch(std::vector<Tensor_t>  & A,
 
  308                            const std::vector<AMatrix_t> & B);
 
  325                                  Tensor_t & Y,  Tensor_t & 
dY,
 
  327                                  const AFloat alpha = 1,
 
  328                                  const AFloat beta = 1) {}
 
  330   static void ActivationFunctionForward(Tensor_t & 
X, EActivationFunction 
activFunct,
 
  332                          const double coef = 0.0, 
const AFloat alpha = 1,
 
  333                          const AFloat beta = 0);
 
  336   static void ActivationFunctionForward(Tensor_t &Y, 
const Tensor_t & 
X, EActivationFunction 
activFunct,
 
  338                                         const AFloat alpha = 1, 
const AFloat beta = 0);
 
  341   static void ActivationFunctionBackward(Tensor_t & 
dX, 
const Tensor_t & Y,
 
  342                                          const Tensor_t & 
dY,  
const Tensor_t & 
X,
 
  345                                          const AFloat alpha = 1,
 
  346                                          const AFloat beta = 0);
 
  354   static void Relu(Tensor_t &) {}
 
  355   static void Sigmoid(Tensor_t &) {}
 
  356   static void Tanh(Tensor_t &) {}
 
  357   static void FastTanh(Tensor_t &) {}
 
  360   static void Gauss(Tensor_t &) {}
 
  366   static void FastTanhDerivative(Tensor_t &, 
const Tensor_t &) {}
 
  387                                    const Matrix_t &weights);
 
  389                                         const Matrix_t &
output, 
const Matrix_t &weights);
 
  394                                const Matrix_t &weights);
 
  397                                     const Matrix_t &
output, 
const Matrix_t &weights);
 
  402                                       const Matrix_t &weights);
 
  404                                            const Matrix_t &
output, 
const Matrix_t &weights);
 
  439   static void DropoutForward(Tensor_t & A,
 
  441                              TWorkspace         * workspace,
 
  444   static void DropoutBackward(Tensor_t & A,
 
  446                               TWorkspace   * workspace);
 
  464   static void BatchNormLayerForwardTraining(
int axis, 
const Tensor_t &
x, Tensor_t &
y, Matrix_t &gamma, Matrix_t &beta,
 
  472   static void BatchNormLayerForwardInference(
int axis, 
const Tensor_t &
x, Matrix_t &gamma, Matrix_t &beta,
 
  475                                              const TensorDescriptor_t &);
 
  477   static void BatchNormLayerBackward(
int axis, 
const Tensor_t &
x, 
const Tensor_t &
dy, Tensor_t &
dx,
 
  480                                      const Matrix_t &
iVariance, Scalar_t epsilon, 
const TensorDescriptor_t &);
 
  495   static Scalar_t L1Regularization(
const Matrix_t &
W)
 
  498      return TCuda<AFloat>::L1Regularization(
mW);
 
  504      return TCuda<AFloat>::AddL1RegularizationGradients(
mA, 
mW, weightDecay);
 
  507   static Scalar_t L2Regularization(
const Matrix_t &
W)
 
  510      return TCuda<AFloat>::L2Regularization(
mW);
 
  516      return TCuda<AFloat>::AddL1RegularizationGradients(
mA, 
mW, weightDecay);
 
  532   static void InitializeGauss(Matrix_t &A);
 
  533   static void InitializeUniform(Matrix_t &A);
 
  534   static void InitializeIdentity(Matrix_t &A);
 
  535   static void InitializeZero(Matrix_t &A);
 
  536   static void InitializeGlorotNormal(Matrix_t &A);
 
  537   static void InitializeGlorotUniform(Matrix_t &A);
 
  541   static TRandom &GetRandomGenerator();
 
  544   static void SetRandomSeed(
size_t seed);
 
  558   static void Dropout(Tensor_t &A, Scalar_t 
p) {}
 
  572   static void AddConvBiases(Matrix_t &
output, 
const Matrix_t &
biases);
 
  576   static void PrepareInternals(Tensor_t &) {}
 
  579   static void ConvLayerForward(Tensor_t &
output,
 
  581                                const Tensor_t &
input, 
const Matrix_t &weights, 
const Matrix_t &
biases,
 
  582                                const DNN::CNN::TConvParams ¶ms, EActivationFunction 
activFunc,
 
  584                                ConvWorkspace_t &workspace);
 
  604                                 const ConvDescriptors_t &
descriptors, ConvWorkspace_t &workspace, 
size_t ,
 
  605                                 size_t , 
size_t , 
size_t , 
size_t ,
 
  606                                 size_t , 
size_t , 
size_t ,
 
  622   static void Downsample(Tensor_t &A, Tensor_t & , 
const Tensor_t &C, 
const PoolingDescriptors_t &
descriptors,
 
  624                          size_t fltWidth, 
size_t strideRows, 
size_t strideCols);
 
  655   static void Flatten(Tensor_t &A, 
const Tensor_t &B);
 
  659   static void Deflatten(Tensor_t &A, 
const Tensor_t &B); 
 
  662   static void Rearrange(Tensor_t &out, 
const Tensor_t &in);
 
  665   static void RNNForward(
const Tensor_t &
x, 
const Tensor_t &
hx, 
const Tensor_t &
cx, 
const Tensor_t &weights,
 
  666                           Tensor_t &
y, Tensor_t &
hy, Tensor_t &
cy, 
const RNNDescriptors_t &
descr,
 
  669   static void RNNBackward(
const Tensor_t &
x, 
const Tensor_t &
hx, 
const Tensor_t &
cx, 
const Tensor_t &
y, 
const Tensor_t &
dy,
 
  670                    const Tensor_t &
dhy, 
const Tensor_t &
dcy, 
const Tensor_t &weights, Tensor_t &
dx, Tensor_t &
dhx,
 
  671                    Tensor_t &
dcx, Tensor_t &
dw, 
const RNNDescriptors_t &desc, RNNWorkspace_t &workspace);
 
  678                                           Matrix_t & , Matrix_t & ,
 
  688   static Matrix_t &LSTMLayerBackward(
 
  690      Matrix_t & , Matrix_t & ,
 
  691      Matrix_t & , Matrix_t & ,
 
  692      Matrix_t & , Matrix_t & ,
 
  694      Matrix_t & , Matrix_t & ,
 
  695      Matrix_t & , Matrix_t & ,
 
  696      Matrix_t & , Matrix_t & , Matrix_t & ,
 
  697      Matrix_t & , Matrix_t & ,
 
  698      const Matrix_t & , 
const Matrix_t & ,
 
  699      const Matrix_t & , 
const Matrix_t & ,
 
  700      const Matrix_t & , 
const Matrix_t & ,
 
  701      const Matrix_t & , 
const Matrix_t & ,
 
  702      const Matrix_t & , 
const Matrix_t & ,
 
  703      const Matrix_t & , 
const Matrix_t & ,
 
  704      const Matrix_t & , 
const Matrix_t & ,
 
  705      const Matrix_t & , Matrix_t & ,
 
  706      Matrix_t & , Matrix_t & )
 
  712   static Matrix_t &GRULayerBackward(
 
  714      Matrix_t & , Matrix_t & ,
 
  715      Matrix_t & , Matrix_t & ,
 
  716      Matrix_t & , Matrix_t & ,
 
  717      Matrix_t & , Matrix_t & ,
 
  718      Matrix_t & , Matrix_t & , Matrix_t & ,
 
  719      const Matrix_t & , 
const Matrix_t & ,
 
  720      const Matrix_t & , 
const Matrix_t & ,
 
  721      const Matrix_t & , 
const Matrix_t & ,
 
  722      const Matrix_t & , 
const Matrix_t & ,
 
  723      const Matrix_t & , 
const Matrix_t & ,
 
  724      const Matrix_t & , Matrix_t & , 
bool)
 
  745   static void Hadamard(Tensor_t &A, 
const Tensor_t &B)
 
  749      assert(A.GetSize() == B.GetSize());
 
  750      TCuda<AFloat>::Hadamard(
tmpA, 
tmpB);
 
  761   static Scalar_t 
Sum(
const Matrix_t &A, Scalar_t alpha = 1.0, Scalar_t beta = 0.0);
 
  769   static void ConstAdd(Matrix_t &A, Scalar_t beta) {
 
  770      tmp(A.GetDeviceBuffer(), 1, A.GetSize());
 
  771      TCuda<AFloat>::ConstAdd(tmp,beta);
 
  777   static void ConstMult(Matrix_t &A, Scalar_t beta) {
 
  778      tmp(A.GetDeviceBuffer(), 1, A.GetSize());
 
  779      TCuda<AFloat>::ConstMult(tmp,beta);
 
  786      tmp(A.GetDeviceBuffer(), 1, A.GetSize());
 
  787      TCuda<AFloat>::ReciprocalElementWise(tmp);
 
  794      tmp(A.GetDeviceBuffer(), 1, A.GetSize());
 
  795      TCuda<AFloat>::SquareElementWise(tmp);
 
  803      tmp(A.GetDeviceBuffer(), 1, A.GetSize());
 
  804      TCuda<AFloat>::SqrtElementWise(tmp);
 
  808   static void AdamUpdate(Matrix_t & A, 
const Matrix_t & M, 
const Matrix_t & V, Scalar_t alpha, Scalar_t eps) {
 
  817      TCuda<AFloat>::AdamUpdateFirstMom(
tmpA, 
tmpB,  beta);
 
  822      TCuda<AFloat>::AdamUpdateSecondMom(
tmpA, 
tmpB,  beta);
 
  826   static void PrintTensor( 
const Tensor_t & A, 
const std::string 
name = 
"tensor", 
bool = 
true);
 
  838   static void SumRows(Matrix_t &B, 
const Matrix_t &A);
 
  843template <
typename AFloat>
 
  844template <
typename ATensor>
 
  852   if (B.GetLayout() == GetTensorLayout()) {
 
  853      if ( B.GetShape().size() == 4) {
 
  854         assert(B.GetShape().size() == 4);
 
  855         size_t firstSize = (A.GetLayout() == GetTensorLayout()) ? A.GetShape()[0] : A.GetShape().back();
 
  882template <
typename AFloat>
 
  883template <
typename AMatrix>
 
  890   if (B.GetLayout() == GetTensorLayout()  ) {
 
  901template <
typename AFloat>
 
  902template <
typename AMatrix_t>
 
  904                            const std::vector<AMatrix_t> &A)
 
  906   for (
size_t i = 0; i < B.size(); ++i) {
 
  911template <
typename AFloat>
 
  914   std::cout << 
name << 
"  size = " << A.GetSize() << 
" shape = { ";
 
  915   auto shape = A.GetShape();
 
  916   for (
size_t k = 0; k < shape.size()-1; ++k)
 
  917      std::cout << shape[k] << 
" , ";
 
  918   std::cout << shape.back() << 
" } ";
 
  919   std::cout << 
" strides = { ";
 
  920   auto strides = A.GetStrides();
 
  921   for (
size_t k = 0; k < strides.size()-1; ++k)
 
  922      std::cout << strides[k] << 
" , ";
 
  923   std::cout << strides.back() << 
" }\n ";
 
  924   if (A.GetShape().size() == 1 ) {
 
  925      size_t n =  A.GetShape()[0];
 
  927      for (
size_t j = 0; 
j < 
n; ++
j) {
 
  928         std::cout << A(0,
j) << 
" ";
 
  930      if (
truncate && 
n < A.GetShape()[0]) std::cout << 
" ...... ";
 
  931      std::cout << 
" } " << std::endl;
 
  932   } 
else if (A.GetShape().size() == 2 ) {
 
  933      size_t n1 =  A.GetShape()[0];
 
  934      size_t n2 =  A.GetShape()[1];
 
  936      for (
size_t i = 0; i < 
n1; ++i) {
 
  939         for (
size_t j = 0; 
j < 
n2; ++
j) {
 
  940            std::cout << A(i,
j) << 
" ";
 
  942         if (
truncate && 
n2 < A.GetShape()[1]) std::cout << 
" ...... ";
 
  943         std::cout << 
" } " << std::endl;
 
  945      if (
truncate && 
n1 < A.GetShape()[0]) std::cout << 
" ...............\n";
 
  946   } 
else if  (A.GetShape().size() == 3 ) {
 
  947      size_t n1 =  A.GetFirstSize();
 
  948      size_t n2 =  A.GetHSize();
 
  949      size_t n3 = A.GetWSize();
 
  953      for (
size_t i = 0; i < 
n1; ++i) {
 
  955         for (
size_t j = 0; 
j < 
n2; ++
j) {
 
  957            for (
size_t k = 0; k < 
n3; ++k) {
 
  958               std::cout << A(i,
j,k) << 
" ";
 
  960            if (
truncate && 
n3 < A.GetWSize()) std::cout << 
" ...... ";
 
  961            std::cout << 
" } " << std::endl;
 
  963         if (
truncate && 
n2 < A.GetHSize()) std::cout << 
".................\n";
 
  964         std::cout << 
" } " << std::endl;
 
  966      if (
truncate && 
n1 < A.GetFirstSize()) std::cout << 
"...................\n";
 
  967   } 
else if  (A.GetShape().size() == 4 ) {
 
  968      for (
size_t i = 0; i < A.GetShape()[0]; ++i) {
 
  970         for (
size_t j = 0; 
j < A.GetShape()[1]; ++
j) {
 
  972            for (
size_t k = 0; k < A.GetShape()[2]; ++k) {
 
  973               size_t n =  A.GetShape()[3];
 
  975               for (
size_t l = 0; 
l < 
n; ++
l) {
 
  976                  std::cout << A(i,
j,k,
l) << 
" ";
 
  978               if (
truncate && 
n < A.GetShape()[3]) std::cout << 
" ...... ";
 
  979               std::cout << 
" } " << std::endl;
 
  981            std::cout << 
" } " << std::endl;
 
  983         std::cout << 
" } " << std::endl;
 
  987      for (
size_t l = 0; 
l < A.GetSize(); ++
l) {
 
  988         std::cout << A.GetData()[
l] << 
" ";
 
  994template <
typename AFloat>
 
 1000   std::cout << 
"Descriptor for 4d tensor of shape  { " << 
n << 
" , " << 
c << 
" , " << 
h << 
" , " << 
w << 
" }" 
 1001             << 
" and strides { " << 
s1 << 
" , " << 
s2 << 
" , " << 
s3 << 
" , " << 
s4 << 
" }" << std::endl;
 
 1003template <
typename AFloat>
 
 1007   std::vector<int> 
dims(ndim);
 
 1008   std::vector<int> strides(ndim);
 
 1013   std::cout << 
"Descriptor for Nd tensor of dim = " << 
n << 
" shape  { ";
 
 1015      std::cout << 
d << 
" , ";
 
 1016   std::cout << 
"} and strides { ";
 
 1017   for (
auto s : strides)
 
 1018      std::cout << s << 
" , ";
 
 1019   std::cout << 
" }" << std::endl;
 
 1034template <
typename AFloat>
 
 1036template <
typename AFloat>
 
 1038template <
typename AFloat>
 
 1040template <
typename AFloat>
 
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
 
winID h TVirtualViewer3D TVirtualGLPainter p
 
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
 
void PrintTensor(RTensor< T > &t)
 
This is the base class for the ROOT Random number generators.
 
T Sum(const RVec< T > &v, const T zero=T(0))
Sum elements of an RVec.
 
void Copy(void *source, void *dest)
 
__global__ void SymmetricRelu(AFloat *A, int m, int n)
 
__global__ void SigmoidDerivative(AFloat *B, const AFloat *A, int m, int n)
 
__global__ void Dropout(AFloat *A, int m, int n, AFloat dropoutProbability, curandState_t *state)
 
__global__ void SoftmaxCrossEntropyGradients(AFloat *dY, const AFloat *Y, const AFloat *output, const AFloat *weights, int m, int n)
 
__global__ void IdentityDerivative(AFloat *A, int m, int n)
 
__global__ void SqrtElementWise(AFloat *A, int m, int n)
 
__global__ void AdamUpdate(AFloat *A, const AFloat *M, const AFloat *V, int m, int n, AFloat alpha, AFloat eps)
optimizer kernel functions
 
__global__ void SoftmaxCrossEntropy(AFloat *result, const AFloat *Y, const AFloat *output, const AFloat *weights, int m, int n)
 
__global__ void AddL1RegularizationGradients(AFloat *A, const AFloat *B, AFloat weightDecay, int m, int n)
 
__global__ void MeanSquaredErrorGradients(AFloat *dY, const AFloat *Y, const AFloat *output, const AFloat *weights, int m, int n)
 
__global__ void Relu(AFloat *A, int m, int n)
 
__global__ void ReluDerivative(AFloat *B, const AFloat *A, int m, int n)
 
__global__ void AddL2RegularizationGradients(AFloat *A, const AFloat *B, AFloat weightDecay, int m, int n)
 
__global__ void AddRowWise(AFloat *W, const AFloat *theta, int m, int n)
 
__global__ void ConstMult(AFloat *A, AFloat beta, int m, int n)
 
__global__ void GaussDerivative(AFloat *B, const AFloat *A, int m, int n)
 
__global__ void Deflatten(AFloat *A, const AFloat *B, int size, int nRows, int nCols)
Deflatten a 2D-array into an array of 2D-arrays.
 
__global__ void CrossEntropy(AFloat *result, const AFloat *Y, const AFloat *output, const AFloat *weights, int m, int n)
 
__global__ void Softmax(AFloat *B, const AFloat *A, int m, int n)
 
__global__ void TanhDerivative(AFloat *B, const AFloat *A, int m, int n)
 
__global__ void CrossEntropyGradients(AFloat *dY, const AFloat *Y, const AFloat *output, const AFloat *weights, int m, int n)
 
__global__ void ConstAdd(AFloat *A, AFloat beta, int m, int n)
 
__global__ void SymmetricReluDerivative(AFloat *B, const AFloat *A, int m, int n)
 
__global__ void MeanSquaredError(AFloat *result, const AFloat *Y, const AFloat *output, const AFloat *weights, int m, int n)
 
__global__ void SquareElementWise(AFloat *A, int m, int n)
 
__global__ void SoftSignDerivative(AFloat *B, const AFloat *A, int m, int n)
 
__global__ void Hadamard(AFloat *B, const AFloat *A, int m, int n)
 
__global__ void AdamUpdateFirstMom(AFloat *A, const AFloat *B, int m, int n, AFloat beta)
 
__global__ void ReciprocalElementWise(AFloat *A, int m, int n)
 
__global__ void Downsample(AFloat *output, AFloat *indexMatrix, const AFloat *input, int depth, int imgHeight, int imgWidth, int fltHeight, int fltWidth, int strideRows, int strideCols)
Downsampling kernel used as the forward propagation step of a Max-Pooling layer.
 
__global__ void AdamUpdateSecondMom(AFloat *A, const AFloat *B, int m, int n, AFloat beta)
 
std::shared_ptr< std::function< double(double)> > Tanh
 
std::shared_ptr< std::function< double(double)> > Gauss
 
std::shared_ptr< std::function< double(double)> > Sigmoid
 
std::shared_ptr< std::function< double(double)> > SoftSign
 
MemoryLayout
Memory layout type (copy from RTensor.hxx)
 
create variable transformations