18#ifndef TMVA_DNN_ARCHITECTURES_CUDNN
19#define TMVA_DNN_ARCHITECTURES_CUDNN
21#include "RConfigure.h"
24#error This file can be compiled only when cudnn is available in ROOT
53struct TCudnnEmptyDescriptor {};
63template<
typename AFloat = Float_t>
70 using Scalar_t = AFloat;
71 using Matrix_t = TCudaTensor<AFloat>;
72 using Tensor_t = TCudaTensor<AFloat>;
73 using DeviceBuffer_t = TCudaDeviceBuffer<AFloat>;
74 using HostBuffer_t = TCudaHostBuffer<AFloat>;
77 using ActivationDescriptor_t = cudnnActivationDescriptor_t;
78 using ConvolutionDescriptor_t = cudnnConvolutionDescriptor_t;
79 using DropoutDescriptor_t = cudnnDropoutDescriptor_t;
80 using FilterDescriptor_t = cudnnFilterDescriptor_t;
82 using PoolingDescriptor_t = cudnnPoolingDescriptor_t;
84 using AlgorithmForward_t = cudnnConvolutionFwdAlgo_t;
85 using AlgorithmBackward_t = cudnnConvolutionBwdDataAlgo_t;
86 using AlgorithmHelper_t = cudnnConvolutionBwdFilterAlgo_t;
87 using AlgorithmDataType_t = cudnnDataType_t;
88 using ReduceTensorDescriptor_t = cudnnReduceTensorDescriptor_t;
89 using TensorDescriptor_t = cudnnTensorDescriptor_t;
90 using RecurrentDescriptor_t = cudnnRNNDescriptor_t;
92 using EmptyDescriptor_t = TCudnnEmptyDescriptor;
94 using BNormLayer_t = TBatchNormLayer<TCudnn<AFloat>>;
95 using BNormDescriptors_t = TDNNGenDescriptors<BNormLayer_t>;
97 using ConvLayer_t = CNN::TConvLayer<TCudnn<AFloat>>;
98 using ConvDescriptors_t = CNN::TCNNDescriptors<ConvLayer_t>;
99 using ConvWorkspace_t = CNN::TCNNWorkspace<ConvLayer_t>;
100 using PoolingLayer_t = CNN::TMaxPoolLayer<TCudnn<AFloat>>;
101 using PoolingDescriptors_t = CNN::TCNNDescriptors<PoolingLayer_t>;
102 using PoolingWorkspace_t = CNN::TCNNWorkspace<PoolingLayer_t>;
104 using RNNLayer_t = RNN::TBasicRNNLayer<TCudnn<AFloat>>;
105 using RNNDescriptors_t = RNN::TRNNDescriptors<TCudnn<AFloat>>;
106 using RNNWorkspace_t = RNN::TRNNWorkspace<TCudnn<AFloat>>;
108 using LSTMLayer_t = RNN::TBasicLSTMLayer<TCudnn<AFloat>>;
112 using GRULayer_t = RNN::TBasicGRULayer<TCudnn<AFloat>>;
123 static int ConvFwdAlgorithm;
124 static int ConvBwdDataAlgorithm;
125 static int ConvBwdFilterAlgorithm;
127 static Long_t ConvMaxWorkspaceSize;
133 static Tensor_t CreateTensor(
size_t n,
size_t c,
size_t h,
size_t w) {
134 return Tensor_t( {
n,
c,
h,w}, GetTensorLayout(), 0, 0);
137 static Tensor_t CreateTensor(DeviceBuffer_t buffer,
size_t n,
size_t c,
size_t h,
size_t w) {
138 return Tensor_t( buffer, {
n,
c,
h,w}, GetTensorLayout(), 0, 0);
141 static Tensor_t CreateTensor(
size_t n,
size_t c,
size_t w)
143 return Tensor_t({
n,
c, w}, GetTensorLayout(), 0, 0);
146 static Tensor_t CreateTensor(DeviceBuffer_t buffer,
size_t n,
size_t c,
size_t w)
148 return Tensor_t(buffer, {
n,
c, w}, GetTensorLayout(), 0, 0);
151 static bool IsCudnn() {
return true; }
155 static void CreateWeightTensors( std::vector<Matrix_t> & newWeights,
const std::vector<Matrix_t> & weights) {
156 if (!newWeights.empty()) newWeights.clear();
157 size_t n = weights.size();
158 for (
size_t i = 0; i <
n; ++i)
159 newWeights.emplace_back( weights[i].GetShape(), weights[i].GetLayout(), 0, 0);
166 static void InitializeBNormDescriptors(TDescriptors * & descriptors,
167 BNormLayer_t *
L =
nullptr);
169 static void InitializeConvDescriptors(TDescriptors * & descriptors,
170 ConvLayer_t *
L =
nullptr);
172 static void InitializePoolDescriptors(TDescriptors * & descriptors,
173 PoolingLayer_t *
L =
nullptr);
175 static void InitializeRNNDescriptors(TDescriptors *&descriptors, RNNLayer_t *layer)
177 InitializeRecurrentDescriptors<RNNLayer_t>(descriptors, layer);
179 static void InitializeLSTMDescriptors(TDescriptors *&descriptors, LSTMLayer_t *layer) {
180 InitializeRecurrentDescriptors<LSTMLayer_t>(descriptors, layer);
182 static void InitializeGRUDescriptors(TDescriptors *&descriptors, GRULayer_t *layer) {
183 InitializeRecurrentDescriptors<GRULayer_t>(descriptors, layer);
185 template<
typename RNNLayer>
186 static void InitializeRecurrentDescriptors(TDescriptors *&descriptors, RNNLayer *
L);
190 static void InitializeActivationDescriptor(ActivationDescriptor_t & descriptors,
EActivationFunction activFunc,
double coef = 0.0);
192 static void ReleaseConvDescriptors(TDescriptors * descriptors );
193 static void ReleasePoolDescriptors(TDescriptors * descriptors );
194 static void ReleaseRNNDescriptors(TDescriptors *descriptors);
195 static void ReleaseBNormDescriptors(TDescriptors * descriptors );
196 static void ReleaseDescriptor(EmptyDescriptor_t & emptyDescr) {}
197 static void ReleaseDescriptor(ActivationDescriptor_t & activationDescr);
198 static void ReleaseDescriptor(ConvolutionDescriptor_t & convolutionDescr);
199 static void ReleaseDescriptor(DropoutDescriptor_t & dropoutDescr);
200 static void ReleaseDescriptor(FilterDescriptor_t & filterDescr);
201 static void ReleaseDescriptor(PoolingDescriptor_t & poolingDescr);
202 static void ReleaseDescriptor(TensorDescriptor_t & tensorDescr);
205 static void InitializeConvWorkspace(TWorkspace * & workspace,
206 TDescriptors * & descriptors,
208 ConvLayer_t *
L =
nullptr);
209 static void InitializePoolDropoutWorkspace(TWorkspace * & workspace,
210 TDescriptors * & descriptors,
212 PoolingLayer_t *
L =
nullptr);
214 static void InitializeRNNWorkspace(TWorkspace *&workspace, TDescriptors *&descriptors, RNNLayer_t *layer)
216 InitializeRecurrentWorkspace<RNNLayer_t>(workspace, descriptors, layer);
218 static void InitializeLSTMWorkspace(TWorkspace *&workspace, TDescriptors *&descriptors, LSTMLayer_t *layer)
220 InitializeRecurrentWorkspace<LSTMLayer_t>(workspace, descriptors, layer);
222 static void InitializeGRUWorkspace(TWorkspace *&workspace, TDescriptors *&descriptors, GRULayer_t *layer)
224 InitializeRecurrentWorkspace<GRULayer_t>(workspace, descriptors, layer);
226 template<
typename RNNLayer>
227 static void InitializeRecurrentWorkspace(TWorkspace *&workspace, TDescriptors *&descriptors,
230 static void FreeConvWorkspace(TWorkspace * workspace);
231 static void FreePoolDropoutWorkspace(TWorkspace * workspace);
232 static void FreeRNNWorkspace(TWorkspace *workspace);
235 static void InitializeRNNTensors(RNNLayer_t *layer) { InitializeRecurrentTensors<RNNLayer_t>(layer); }
236 static void InitializeLSTMTensors(LSTMLayer_t *layer) { InitializeRecurrentTensors<LSTMLayer_t>(layer); }
237 static void InitializeGRUTensors(GRULayer_t *layer) { InitializeRecurrentTensors<GRULayer_t>(layer); }
238 template <
typename RNNLayer>
239 static void InitializeRecurrentTensors(RNNLayer *layer);
253 static void MultiplyTranspose(Tensor_t &
output,
const Tensor_t &input,
const Matrix_t &weights);
256 static void AddRowWise(Tensor_t &
output,
const Matrix_t &biases);
271 static void Backward(Tensor_t & activationGradientsBackward,
272 Matrix_t & weightGradients,
273 Matrix_t & biasGradients,
275 const Tensor_t & activationGradients,
276 const Matrix_t & weights,
277 const Tensor_t & activationBackward);
280 static void ScaleAdd(Tensor_t &
A,
const Tensor_t &
B,
281 Scalar_t alpha = 1.0,
282 Scalar_t
beta = 1.0);
285 static void Copy(Tensor_t &
A,
const Tensor_t &
B);
288 template<
typename ATensor_t>
289 static void CopyDiffArch(Tensor_t &
A,
290 const ATensor_t &
B);
292 template <
typename ATensor_t>
293 static void CopyWeightsDiffArch(Tensor_t &
A,
const ATensor_t &
B);
296 static void CopyDiffArch(Tensor_t
A,
const Tensor_t &
B ) {
Copy(
A,
B); }
299 template<
typename AMatrix_t>
300 static void CopyDiffArch(std::vector<Tensor_t> &
A,
301 const std::vector<AMatrix_t> &
B);
316 static void Identity(Tensor_t & X) {}
317 static void IdentityDerivative(Tensor_t & dX, Tensor_t& X,
318 Tensor_t & Y, Tensor_t & dY,
319 ActivationDescriptor_t activationDescr,
320 const AFloat alpha = 1,
321 const AFloat
beta = 1) {}
324 const ActivationDescriptor_t activationDescr,
325 const double coef = 0.0,
const AFloat alpha = 1,
326 const AFloat
beta = 0);
329 static void ActivationFunctionForward(Tensor_t &Y,
const Tensor_t & X,
EActivationFunction activFunct,
330 const ActivationDescriptor_t activationDescr,
const double coef = 0.0,
331 const AFloat alpha = 1,
const AFloat
beta = 0);
334 static void ActivationFunctionBackward(Tensor_t & dX,
const Tensor_t & Y,
335 const Tensor_t & dY,
const Tensor_t & X,
337 const ActivationDescriptor_t activationDescr,
338 const AFloat alpha = 1,
339 const AFloat
beta = 0);
347 static void Relu(Tensor_t &) {}
348 static void Sigmoid(Tensor_t &) {}
349 static void Tanh(Tensor_t &) {}
350 static void FastTanh(Tensor_t &) {}
351 static void SymmetricRelu(Tensor_t &) {}
353 static void Gauss(Tensor_t &) {}
355 static void IdentityDerivative(Tensor_t &,
const Tensor_t &) {}
356 static void ReluDerivative(Tensor_t &,
const Tensor_t &) {}
357 static void SigmoidDerivative(Tensor_t &,
const Tensor_t &) {}
358 static void TanhDerivative(Tensor_t &,
const Tensor_t &) {}
359 static void FastTanhDerivative(Tensor_t &,
const Tensor_t &) {}
360 static void SymmetricReluDerivative(Tensor_t & ,
const Tensor_t & ) {}
361 static void SoftSignDerivative(Tensor_t & ,
const Tensor_t & ) {}
362 static void GaussDerivative(Tensor_t & ,
const Tensor_t & ) {}
379 static Scalar_t MeanSquaredError(
const Matrix_t &Y,
const Matrix_t &
output,
380 const Matrix_t &weights);
381 static void MeanSquaredErrorGradients(Matrix_t &dY,
const Matrix_t &Y,
382 const Matrix_t &
output,
const Matrix_t &weights);
386 static Scalar_t CrossEntropy(
const Matrix_t &Y,
const Matrix_t &
output,
387 const Matrix_t &weights);
389 static void CrossEntropyGradients(Matrix_t &dY,
const Matrix_t &Y,
390 const Matrix_t &
output,
const Matrix_t &weights);
394 static Scalar_t SoftmaxCrossEntropy(
const Matrix_t &Y,
const Matrix_t &
output,
395 const Matrix_t &weights);
396 static void SoftmaxCrossEntropyGradients(Matrix_t &dY,
const Matrix_t &Y,
397 const Matrix_t &
output,
const Matrix_t &weights);
413 static void Sigmoid(Matrix_t &YHat,
415 static void Softmax(Matrix_t &YHat,
432 static void DropoutForward(Tensor_t &
A,
433 TDescriptors * descriptors,
434 TWorkspace * workspace,
437 static void DropoutBackward(Tensor_t &
A,
438 TDescriptors * descriptors,
439 TWorkspace * workspace);
457 static void BatchNormLayerForwardTraining(
int axis,
const Tensor_t &
x, Tensor_t &
y, Matrix_t &
gamma, Matrix_t &
beta,
458 Matrix_t &mean, Matrix_t &, Matrix_t &iVariance, Matrix_t &runningMeans,
459 Matrix_t &runningVars, Scalar_t nTrainedBatches, Scalar_t momentum,
460 Scalar_t
epsilon,
const TensorDescriptor_t &bnParDescriptor);
465 static void BatchNormLayerForwardInference(
int axis,
const Tensor_t &
x, Matrix_t &
gamma, Matrix_t &
beta,
466 Tensor_t &
y,
const Matrix_t &runningMeans,
467 const Matrix_t &runningVars, Scalar_t
epsilon,
468 const TensorDescriptor_t &);
470 static void BatchNormLayerBackward(
int axis,
const Tensor_t &
x,
const Tensor_t &dy, Tensor_t &dx,
472 Matrix_t &dgamma, Matrix_t &dbeta,
const Matrix_t &mean,
const Matrix_t &variance,
473 const Matrix_t &iVariance, Scalar_t
epsilon,
const TensorDescriptor_t &);
488 static Scalar_t L1Regularization(
const Matrix_t &W)
490 TCudaMatrix<AFloat> mW(W.GetDeviceBuffer(), W.GetSize(), 1);
493 static void AddL1RegularizationGradients(Matrix_t &
A,
const Matrix_t &W, Scalar_t
weightDecay)
495 TCudaMatrix<AFloat> mA(
A.GetDeviceBuffer(),
A.GetSize(), 1);
496 TCudaMatrix<AFloat> mW(W.GetDeviceBuffer(), W.GetSize(), 1);
500 static Scalar_t L2Regularization(
const Matrix_t &W)
502 TCudaMatrix<AFloat> mW(W.GetDeviceBuffer(), W.GetSize(), 1);
505 static void AddL2RegularizationGradients(Matrix_t &
A,
const Matrix_t &W, Scalar_t
weightDecay)
507 TCudaMatrix<AFloat> mA(
A.GetDeviceBuffer(),
A.GetSize(), 1);
508 TCudaMatrix<AFloat> mW(W.GetDeviceBuffer(), W.GetSize(), 1);
525 static void InitializeGauss(Matrix_t &
A);
526 static void InitializeUniform(Matrix_t &
A);
527 static void InitializeIdentity(Matrix_t &
A);
528 static void InitializeZero(Matrix_t &
A);
529 static void InitializeGlorotNormal(Matrix_t &
A);
530 static void InitializeGlorotUniform(Matrix_t &
A);
534 static TRandom &GetRandomGenerator();
537 static void SetRandomSeed(
size_t seed);
551 static void Dropout(Tensor_t &
A, Scalar_t p) {}
565 static void AddConvBiases(Matrix_t &
output,
const Matrix_t &biases);
569 static void PrepareInternals(Tensor_t &) {}
572 static void ConvLayerForward(Tensor_t &
output,
573 Tensor_t &inputActivationFunc,
574 const Tensor_t &input,
const Matrix_t &weights,
const Matrix_t &biases,
576 Tensor_t & ,
const ConvDescriptors_t &descriptors,
577 ConvWorkspace_t &workspace);
593 static void ConvLayerBackward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients,
594 Matrix_t &biasGradients, Tensor_t &inputActivation, Tensor_t &activationGradients,
595 const Matrix_t &weights,
const Tensor_t &activationBackward,
597 const ConvDescriptors_t &descriptors, ConvWorkspace_t &workspace,
size_t ,
598 size_t ,
size_t ,
size_t ,
size_t ,
599 size_t ,
size_t ,
size_t ,
615 static void Downsample(Tensor_t &
A, Tensor_t & ,
const Tensor_t &
C,
const PoolingDescriptors_t &descriptors,
616 PoolingWorkspace_t &workspace,
size_t imgHeight,
size_t imgWidth,
size_t fltHeight,
617 size_t fltWidth,
size_t strideRows,
size_t strideCols);
627 static void MaxPoolLayerBackward(Tensor_t &activationGradientsBackward,
const Tensor_t &activationGradients,
628 const Tensor_t & ,
const Tensor_t &inputActivation,
629 const Tensor_t &outputTensor,
const PoolingDescriptors_t &descriptors,
630 PoolingWorkspace_t &workspace,
size_t imgHeight,
size_t imgWidth,
size_t fltHeight,
631 size_t fltWidth,
size_t strideRows,
size_t strideCols,
size_t nLocalViews);
648 static void Flatten(Tensor_t &
A,
const Tensor_t &
B);
652 static void Deflatten(Tensor_t &
A,
const Tensor_t &
B);
655 static void Rearrange(Tensor_t &out,
const Tensor_t &in);
658 static void RNNForward(
const Tensor_t &
x,
const Tensor_t &hx,
const Tensor_t &cx,
const Tensor_t &weights,
659 Tensor_t &
y, Tensor_t &hy, Tensor_t &cy,
const RNNDescriptors_t &descr,
660 RNNWorkspace_t &workspace,
bool isTraining);
662 static void RNNBackward(
const Tensor_t &
x,
const Tensor_t &hx,
const Tensor_t &cx,
const Tensor_t &
y,
const Tensor_t &dy,
663 const Tensor_t &dhy,
const Tensor_t &dcy,
const Tensor_t &weights, Tensor_t &dx, Tensor_t &dhx,
664 Tensor_t &dcx, Tensor_t &dw,
const RNNDescriptors_t &desc, RNNWorkspace_t &workspace);
669 static Matrix_t &RecurrentLayerBackward(Matrix_t &state_gradients_backward,
671 Matrix_t & , Matrix_t & ,
679 return state_gradients_backward;
681 static Matrix_t &LSTMLayerBackward(
682 Matrix_t & state_gradients_backward , Matrix_t & ,
683 Matrix_t & , Matrix_t & ,
684 Matrix_t & , Matrix_t & ,
685 Matrix_t & , Matrix_t & ,
687 Matrix_t & , Matrix_t & ,
688 Matrix_t & , Matrix_t & ,
689 Matrix_t & , Matrix_t & , Matrix_t & ,
690 Matrix_t & , Matrix_t & ,
691 const Matrix_t & ,
const Matrix_t & ,
692 const Matrix_t & ,
const Matrix_t & ,
693 const Matrix_t & ,
const Matrix_t & ,
694 const Matrix_t & ,
const Matrix_t & ,
695 const Matrix_t & ,
const Matrix_t & ,
696 const Matrix_t & ,
const Matrix_t & ,
697 const Matrix_t & ,
const Matrix_t & ,
698 const Matrix_t & , Matrix_t & ,
699 Matrix_t & , Matrix_t & )
701 return state_gradients_backward;
705 static Matrix_t &GRULayerBackward(
706 Matrix_t & state_gradients_backward, Matrix_t & ,
707 Matrix_t & , Matrix_t & ,
708 Matrix_t & , Matrix_t & ,
709 Matrix_t & , Matrix_t & ,
710 Matrix_t & , Matrix_t & ,
711 Matrix_t & , Matrix_t & , Matrix_t & ,
712 const Matrix_t & ,
const Matrix_t & ,
713 const Matrix_t & ,
const Matrix_t & ,
714 const Matrix_t & ,
const Matrix_t & ,
715 const Matrix_t & ,
const Matrix_t & ,
716 const Matrix_t & ,
const Matrix_t & ,
717 const Matrix_t & , Matrix_t & ,
bool)
719 return state_gradients_backward;
738 static void Hadamard(Tensor_t &
A,
const Tensor_t &
B)
740 TCudaMatrix<AFloat> tmpA(
A.GetDeviceBuffer(), 1,
A.GetSize());
741 TCudaMatrix<AFloat> tmpB(
B.GetDeviceBuffer(), 1,
B.GetSize());
742 assert(
A.GetSize() ==
B.GetSize());
754 static Scalar_t
Sum(
const Matrix_t &
A, Scalar_t alpha = 1.0, Scalar_t
beta = 0.0);
762 static void ConstAdd(Matrix_t &
A, Scalar_t
beta) {
763 TCudaMatrix<AFloat> tmp(
A.GetDeviceBuffer(), 1,
A.GetSize());
770 static void ConstMult(Matrix_t &
A, Scalar_t
beta) {
771 TCudaMatrix<AFloat> tmp(
A.GetDeviceBuffer(), 1,
A.GetSize());
778 static void ReciprocalElementWise(Matrix_t &
A) {
779 TCudaMatrix<AFloat> tmp(
A.GetDeviceBuffer(), 1,
A.GetSize());
786 static void SquareElementWise(Matrix_t &
A) {
787 TCudaMatrix<AFloat> tmp(
A.GetDeviceBuffer(), 1,
A.GetSize());
795 static void SqrtElementWise(Matrix_t &
A) {
796 TCudaMatrix<AFloat> tmp(
A.GetDeviceBuffer(), 1,
A.GetSize());
801 static void AdamUpdate(Matrix_t &
A,
const Matrix_t & M,
const Matrix_t & V, Scalar_t alpha, Scalar_t eps) {
802 TCudaMatrix<AFloat> tmpA(
A.GetDeviceBuffer(),
A.GetSize(),1);
803 TCudaMatrix<AFloat> tmpM(M.GetDeviceBuffer(), M.GetSize(),1);
804 TCudaMatrix<AFloat> tmpV(V.GetDeviceBuffer(), V.GetSize(),1);
807 static void AdamUpdateFirstMom(Matrix_t &
A,
const Matrix_t &
B, Scalar_t
beta) {
808 TCudaMatrix<AFloat> tmpA(
A.GetDeviceBuffer(),
A.GetSize(),1);
809 TCudaMatrix<AFloat> tmpB(
B.GetDeviceBuffer(),
B.GetSize(),1);
812 static void AdamUpdateSecondMom(Matrix_t &
A,
const Matrix_t &
B, Scalar_t
beta) {
813 TCudaMatrix<AFloat> tmpA(
A.GetDeviceBuffer(),
A.GetSize(),1);
814 TCudaMatrix<AFloat> tmpB(
B.GetDeviceBuffer(),
B.GetSize(),1);
819 static void PrintTensor(
const Tensor_t &
A,
const std::string
name =
"tensor",
bool =
false);
821 static void PrintTensor4dDescriptor(TensorDescriptor_t descriptor);
822 static void PrintTensorNdDescriptor(TensorDescriptor_t descriptor,
int n = 10);
831 static void SumRows(Matrix_t &
B,
const Matrix_t &
A);
836template <
typename AFloat>
837template <
typename ATensor>
838void TCudnn<AFloat>::CopyDiffArch(TCudaTensor<AFloat> &
B,
845 if (
B.GetLayout() == GetTensorLayout()) {
846 if (
B.GetShape().size() == 4) {
847 assert(
B.GetShape().size() == 4);
848 size_t firstSize = (
A.GetLayout() == GetTensorLayout()) ?
A.GetShape()[0] :
A.GetShape().back();
849 for (
size_t i = 0; i < firstSize; ++i) {
852 TCudaTensor<AFloat> tmpOut =
B.At(i);
854 TCudaTensor<AFloat> tmpIn(matIn.
GetMatrixArray(), tmpOut.GetShape(), tmpOut.GetLayout());
861 TCudaMatrix<AFloat> tmp2(tmp);
862 TCudaTensor<AFloat> tA(tmp2);
868 TCudaMatrix<AFloat> tmp2(tmp);
869 TCudaTensor<AFloat> tA(tmp2);
875template <
typename AFloat>
876template <
typename AMatrix>
877void TCudnn<AFloat>::CopyWeightsDiffArch(TCudaTensor<AFloat> &
B,
const AMatrix &
A)
883 if (
B.GetLayout() == GetTensorLayout() ) {
888 TCudaMatrix<AFloat> tmp2(tmp);
889 TCudaTensor<AFloat> tA(tmp2);
894template <
typename AFloat>
895template <
typename AMatrix_t>
896void TCudnn<AFloat>::CopyDiffArch(std::vector<Tensor_t> &
B,
897 const std::vector<AMatrix_t> &
A)
899 for (
size_t i = 0; i <
B.size(); ++i) {
900 CopyWeightsDiffArch(
B[i],
A[i]);
904template <
typename AFloat>
905void TCudnn<AFloat>::PrintTensor(
const typename TCudnn<AFloat>::Tensor_t &
A,
const std::string
name,
bool truncate )
907 std::cout <<
name <<
" size = " <<
A.GetSize() <<
" shape = { ";
908 auto shape =
A.GetShape();
909 for (
size_t k = 0; k < shape.size()-1; ++k)
910 std::cout << shape[k] <<
" , ";
911 std::cout << shape.back() <<
" } ";
912 std::cout <<
" strides = { ";
913 auto strides =
A.GetStrides();
914 for (
size_t k = 0; k < strides.size()-1; ++k)
915 std::cout << strides[k] <<
" , ";
916 std::cout << strides.back() <<
" }\n ";
918 if (
A.GetShape().size() == 2 ) {
919 for (
size_t i = 0; i <
A.GetShape()[0]; ++i) {
921 size_t n =
A.GetShape()[1];
922 if (truncate)
n = std::min(
n,
size_t(10));
923 for (
size_t j = 0; j <
n; ++j) {
924 std::cout <<
A(i,j) <<
" ";
927 if (truncate &&
n <
A.GetShape()[1]) std::cout <<
" ...... ";
928 std::cout <<
" } " << std::endl;
930 }
else if (
A.GetShape().size() == 3 ) {
931 for (
size_t i = 0; i <
A.GetFirstSize(); ++i) {
933 for (
size_t j = 0; j <
A.GetHSize(); ++j) {
935 size_t n =
A.GetWSize();
936 if (truncate)
n = std::min(
n,
size_t(10));
937 for (
size_t k = 0; k <
n; ++k) {
938 std::cout <<
A(i,j,k) <<
" ";
940 if (truncate &&
n <
A.GetWSize()) std::cout <<
" ...... ";
941 std::cout <<
" } " << std::endl;
943 std::cout <<
" } " << std::endl;
945 }
else if (
A.GetShape().size() == 4 ) {
946 for (
size_t i = 0; i <
A.GetShape()[0]; ++i) {
948 for (
size_t j = 0; j <
A.GetShape()[1]; ++j) {
950 for (
size_t k = 0; k <
A.GetShape()[2]; ++k) {
951 size_t n =
A.GetShape()[3];
952 if (truncate)
n = std::min(
n,
size_t(10));
953 for (
size_t l = 0;
l <
n; ++
l) {
954 std::cout <<
A(i,j,k,
l) <<
" ";
956 if (truncate &&
n <
A.GetShape()[3]) std::cout <<
" ...... ";
957 std::cout <<
" } " << std::endl;
959 std::cout <<
" } " << std::endl;
961 std::cout <<
" } " << std::endl;
965 for (
size_t l = 0;
l <
A.GetSize(); ++
l) {
966 std::cout <<
A.GetData()[
l] <<
" ";
972template <
typename AFloat>
973void TCudnn<AFloat>::PrintTensor4dDescriptor(TensorDescriptor_t descriptor) {
975 int s1, s2, s3, s4 = 0;
976 cudnnDataType_t dataType;
977 cudnnGetTensor4dDescriptor(descriptor, &dataType, &
n, &
c, &
h, &w, &
s1, &s2, &s3, &s4);
978 std::cout <<
"Descriptor for 4d tensor of shape { " <<
n <<
" , " <<
c <<
" , " <<
h <<
" , " << w <<
" }"
979 <<
" and strides { " <<
s1 <<
" , " << s2 <<
" , " << s3 <<
" , " << s4 <<
" }" << std::endl;
981template <
typename AFloat>
982void TCudnn<AFloat>::PrintTensorNdDescriptor(TensorDescriptor_t descriptor,
int ndim)
985 std::vector<int> dims(ndim);
986 std::vector<int> strides(ndim);
987 cudnnDataType_t dataType;
988 cudnnGetTensorNdDescriptor(descriptor, ndim, &dataType, &
n, dims.data(), strides.data());
991 std::cout <<
"Descriptor for Nd tensor of dim = " <<
n <<
" shape { ";
993 std::cout <<
d <<
" , ";
994 std::cout <<
"} and strides { ";
995 for (
auto s : strides)
996 std::cout <<
s <<
" , ";
997 std::cout <<
" }" << std::endl;
1012template <
typename AFloat>
1013int TCudnn<AFloat>::CNNOptions::ConvFwdAlgorithm = -1;
1014template <
typename AFloat>
1015int TCudnn<AFloat>::CNNOptions::ConvBwdDataAlgorithm = -1;
1016template <
typename AFloat>
1017int TCudnn<AFloat>::CNNOptions::ConvBwdFilterAlgorithm = -1;
1018template <
typename AFloat>
1019Long_t TCudnn<AFloat>::CNNOptions::ConvMaxWorkspaceSize = -1;
static void AdamUpdate(Matrix_t &A, const Matrix_t &M, const Matrix_t &V, Scalar_t alpha, Scalar_t eps)
static void ReciprocalElementWise(Matrix_t &A)
Reciprocal each element of the matrix A and write the result into A.
static void ConstAdd(Matrix_t &A, Scalar_t beta)
Add the constant beta to all the elements of matrix A and write the result into A.
static void AdamUpdateFirstMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
static void AddL1RegularizationGradients(Matrix_t &A, const Matrix_t &W, Scalar_t weightDecay)
static void AdamUpdateSecondMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
static void SquareElementWise(Matrix_t &A)
Square each element of the matrix A and write the result into A.
static Scalar_t L2Regularization(const Matrix_t &W)
static void Hadamard(Tensor_t &A, const Tensor_t &B)
In-place Hadamard (element-wise) product of matrices A and B with the result being written into A.
static void SqrtElementWise(Matrix_t &A)
Square root each element of the matrix A and write the result into A.
static void ConstMult(Matrix_t &A, Scalar_t beta)
Multiply the constant beta to all the elements of matrix A and write the result into A.
static Scalar_t L1Regularization(const Matrix_t &W)
virtual const Element * GetMatrixArray() const
TMatrixT< Element > & T()
This is the base class for the ROOT Random number generators.
double beta(double x, double y)
Calculates the beta function.
void Copy(void *source, void *dest)
T Sum(const RVec< T > &v)
Sum elements of an RVec.
static constexpr double s
static constexpr double L
struct TMVA::DNN::CNN::TConvParams TConvParams
std::shared_ptr< std::function< double(double)> > Tanh
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
EActivationFunction
Enum that represents layer activation functions.
std::shared_ptr< std::function< double(double)> > Gauss
std::shared_ptr< std::function< double(double)> > Sigmoid
std::shared_ptr< std::function< double(double)> > SoftSign
T Identity(T value)
Identity function f(x) = x.
MemoryLayout
Memory layout type (copy from RTensor.hxx)
create variable transformations
static void output(int code)