38template<
typename AFloat>
50template<
typename AFloat>
58template<
typename AFloat>
91template<
typename AFloat>
107template<
typename AFloat>
114 Tensor_t &
data =
L->GetReshapedData();
115 if (
L->GetNormAxis() == -1 &&
L->GetBatchSize() ==
outputTensor.GetShape()[0] &&
L->GetDepth() == 1 &&
L->GetHeight() == 1 ) {
119 Tensor_t &
data =
L->GetReshapedData();
121 data = Tensor_t(
outputTensor.GetDeviceBuffer(), {1, L->GetWidth(), 1, L->GetBatchSize()}, GetTensorLayout(), 0, 0);
122 }
else if (
L->GetNormAxis() == 1 ) {
125 Tensor_t
tmp( {
L->GetBatchSize() ,
L->GetDepth(),
L->GetHeight(),
L->GetWidth()}, GetTensorLayout(), 0, 0);
126 tmp.PrintShape(
"tmp");
127 data = Tensor_t(
outputTensor.GetDeviceBuffer(), {L->GetBatchSize() , L->GetDepth(), L->GetHeight(), L->GetWidth() }, GetTensorLayout(), 0, 0);
132 {L->GetBatchSize(), L->GetDepth(), L->GetHeight(), L->GetWidth()},
133 GetTensorLayout(), 0, 0 );
143 data.PrintShape(
"reshaped data");
157 data.GetTensorDescriptor(),
163template <
typename AFloat>
165 EActivationFunction
activFunc,
double coef)
196template<
typename AFloat>
199 auto convDescriptors =
new CNN::TCNNDescriptors<typename TCudnn<AFloat>::ConvLayer_t> ();
207 InitializeActivationDescriptor(
convDescriptors->HelperDescriptor,
L->GetActivationFunction(), coef);
214 L->GetPaddingHeight(),
215 L->GetPaddingWidth(),
229 L->GetFilterHeight(),
230 L->GetFilterWidth()));
236template <
typename AFloat>
239 auto poolDescriptors =
new CNN::TCNNDescriptors<typename TCudnn<AFloat>::PoolingLayer_t> ();
247 L->GetFilterHeight(),
252 L->GetStrideCols()));
261 {L->GetBatchSize(), L->GetDepth(), L->GetHeight(), L->GetWidth()},
262 GetTensorLayout(), 0, 0);
270template<
typename AFloat>
279template <
typename AFloat>
288template <
typename AFloat>
295template <
typename AFloat>
301template <
typename AFloat>
307template <
typename AFloat>
312template <
typename AFloat>
318template <
typename AFloat>
324template <
typename AFloat>
331template <
typename AFloat>
337 size_t memLimit = (CNNOptions::ConvMaxWorkspaceSize > 0) ?
static_cast<size_t>(CNNOptions::ConvMaxWorkspaceSize) : 0;
343#if (CUDNN_VERSION >= 8000)
357 if (
algoPref == algoPreference::fastest) {
365 }
else if (
algoPref == algoPreference::workspace_limit) {
374 size_t temp_memsize{std::numeric_limits<size_t>::max()};
399 if (CNNOptions::ConvMaxWorkspaceSize < 0) {
400#if (CUDNN_VERSION >= 8000)
409 }
else if (CNNOptions::ConvMaxWorkspaceSize == 0) {
411#if (CUDNN_VERSION >= 8000)
421#if (CUDNN_VERSION >= 8000)
432 Tensor_t &
filters =
L->GetWeightsAt(0);
433 filters = Tensor_t(
filters.GetDeviceBuffer(), {L->GetDepth(), L->GetInputDepth(), L->GetFilterHeight(), L->GetFilterWidth()}, MemoryLayout::RowMajor, 0, 0);
435 Tensor_t &
biases =
L->GetBiasesAt(0);
436 biases = Tensor_t(
biases.GetDeviceBuffer(), {1, L->GetDepth(), 1, 1}, GetTensorLayout(), 0, 0);
439 outputTensor = Tensor_t(
outputTensor.GetDeviceBuffer(), {L->GetBatchSize(), L->GetDepth(), L->GetHeight(), L->GetWidth()}, GetTensorLayout(), 0, 0);
459 Tensor_t::GetDataType(),
460 (
int)
L->GetBatchSize(),
461 (
int)
L->GetInputDepth(),
462 (
int)
L->GetInputHeight(),
463 (
int)
L->GetInputWidth() ) );
474#if (CUDNN_VERSION >= 8000)
528 std::cout <<
"CONV FWD Algo used for convolution of input shape { " <<
L->GetBatchSize() <<
" , " <<
L->GetInputDepth() <<
" , "
529 <<
L->GetInputHeight() <<
" , " <<
L->GetInputWidth() <<
" } is "
536 if (CNNOptions::ConvFwdAlgorithm > 0) {
538 std::cout <<
" but force using " <<
convWorkspace->AlgorithmForward << std::endl;
556 std::cerr <<
"Error allocating FWD CONV workspace of size " <<
convWorkspace->ForwardWorkspaceSize <<
" - probably running out of memory on the GPU"
558 std::cout <<
" layer input shape is { " <<
L->GetBatchSize() <<
" , " <<
L->GetInputDepth() <<
" , "
559 <<
L->GetInputHeight() <<
" , " <<
L->GetInputWidth() <<
" } " << std::endl;
575#if (CUDNN_VERSION >= 8000)
619 std::cout <<
"CONV BWD Data Algo used is " <<
convWorkspace->AlgorithmBackward << std::endl;
623 if (CNNOptions::ConvBwdDataAlgorithm > 0) {
625 std::cout <<
" but force using " <<
convWorkspace->AlgorithmBackward << std::endl;
638 std::cerr <<
"Error allocating BACKW DATA CONV workspace of size " <<
convWorkspace->BackwardWorkspaceSize <<
" - probably running out of memory on the GPU"
640 std::cout <<
" layer input shape is { " <<
L->GetBatchSize() <<
" , " <<
L->GetInputDepth() <<
" , "
641 <<
L->GetInputHeight() <<
" , " <<
L->GetInputWidth() <<
" } " << std::endl;
650#if (CUDNN_VERSION >= 8000)
695 std::cout <<
"CONV BWD Filter Algo used is " <<
convWorkspace->HelperAlgorithm << std::endl;
697 if (CNNOptions::ConvBwdFilterAlgorithm > 0) {
699 std::cout <<
" but force using " <<
convWorkspace->HelperAlgorithm << std::endl;
713 std::cerr <<
"Error allocating BACKW FILTER CONV workspace of size " <<
convWorkspace->BackwardWorkspaceSize
714 <<
" - probably running out of memory on the GPU" << std::endl;
715 std::cout <<
" layer input shape is { " <<
L->GetBatchSize() <<
" , " <<
L->GetInputDepth() <<
" , "
716 <<
L->GetInputHeight() <<
" , " <<
L->GetInputWidth() <<
" } " << std::endl;
717 filters.PrintShape(
"filterTensor");
753template <
typename AFloat>
770 Tensor_t::GetDataType(),
771 (
int)
L->GetBatchSize(),
772 (
int)
L->GetInputDepth(),
773 (
int)
L->GetInputHeight(),
774 (
int)
L->GetInputWidth() ) );
784 std::cerr <<
"Error allocating POOL reserved droput workspace of size " <<
poolWorkspace->HelperWorkspaceSize
785 <<
" probably running out of memory on the GPU"
787 std::cout <<
" layer input shape is { " <<
L->GetBatchSize() <<
" , " <<
L->GetInputDepth() <<
" , "
788 <<
L->GetInputHeight() <<
" , " <<
L->GetInputWidth() <<
" } " << std::endl;
800 std::cerr <<
"Error allocating POOL droput state of size " <<
poolWorkspace->ForwardWorkspaceSize <<
801 " probably running out of memory on the GPU" << std::endl;
802 std::cout <<
" layer input shape is { " <<
L->GetBatchSize() <<
" , " <<
L->GetInputDepth() <<
" , "
803 <<
L->GetInputHeight() <<
" , " <<
L->GetInputWidth() <<
" } " << std::endl;
815 L->GetDropoutProbability(),
826template <
typename AFloat>
828 if (!workspace)
return;
829 auto convWorkspace =
static_cast<ConvWorkspace_t *
>(workspace);
843template <
typename AFloat>
845 if (!workspace)
return;
846 auto poolWorkspace =
static_cast<PoolingWorkspace_t *
>(workspace);
854template <
typename AFloat>
857 Matrix_t &gamma, Matrix_t &beta,
858 Matrix_t & mean, Matrix_t &, Matrix_t &
iVariance,
877 x.GetTensorDescriptor(),
x.GetDataPointer(),
878 y.GetTensorDescriptor(),
y.GetDataPointer(),
880 gamma.GetDataPointer(),
beta.GetDataPointer(),
884 epsilon, mean.GetDataPointer(),
iVariance.GetDataPointer() ) );
889template <
typename AFloat>
904 x.GetTensorDescriptor(),
x.GetDataPointer(),
905 y.GetTensorDescriptor(),
y.GetDataPointer(),
907 gamma.GetDataPointer(),
beta.GetDataPointer(),
915template <
typename AFloat>
918 Matrix_t &
dgamma, Matrix_t &
dbeta,
const Matrix_t &mean,
928 x.GetTensorDescriptor(),
x.GetDataPointer(),
929 dy.GetTensorDescriptor(),
dy.GetDataPointer(),
930 dx.GetTensorDescriptor(),
dx.GetDataPointer(),
933 epsilon, mean.GetDataPointer(),
iVariance.GetDataPointer() ) );
938template <
typename AFloat>
941 const Tensor_t &
input,
942 const Matrix_t & weights,
const Matrix_t &
biases,
947 ConvWorkspace_t & workspace)
976 std::vector<size_t>
shape_input = {size_t(
n), size_t(
c) , size_t(
h), size_t(
w) };
980 std::vector<size_t>
shape_output = {size_t(
n), size_t(
c) , size_t(
h), size_t(
w) };
988 input.GetTensorDescriptor(),
989 input.GetDataPointer(),
991 weights.GetDataPointer(),
993 workspace.AlgorithmForward,
994 workspace.ForwardWorkspace,
995 workspace.ForwardWorkspaceSize,
1044template<
typename AFloat>
1049 const Matrix_t &weights,
1054 ConvWorkspace_t & workspace,
1083 const AFloat alpha = 1.0;
1084 const AFloat
beta = 0.0;
1097 weights.GetDataPointer(),
1101 workspace.AlgorithmBackward,
1102 workspace.BackwardWorkspace,
1103 workspace.BackwardWorkspaceSize,
1117 workspace.HelperAlgorithm, workspace.HelperWorkspace, workspace.HelperWorkspaceSize, &beta,
1159 for (
size_t event = 0;
event < batchSize;
event++) {
1171template<
typename AFloat>
1200template<
typename AFloat>
1205 PoolingWorkspace_t & workspace,
1213 const AFloat alpha = 1.0;
1214 const AFloat
beta = 0.0;
1219 C.GetTensorDescriptor(),
1222 A.GetTensorDescriptor(),
1223 A.GetDataPointer()));
1227template<
typename AFloat>
1234 PoolingWorkspace_t & workspace,
1243 const AFloat alpha = 1.0;
1244 const AFloat
beta = 0.0;
1308template<
typename AFloat>
1323template<
typename AFloat>
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
static void Backward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, const Tensor_t &df, const Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward)
Perform the complete backward propagation step.
static void Flatten(Tensor_t &A, const Tensor_t &B)
Flattens the tensor B, such that each matrix, is stretched in one row, resulting with a matrix A.
static void AddRowWise(Matrix_t &output, const Matrix_t &biases)
Add the vectors biases row-wise to the matrix output.
static void SumColumns(Matrix_t &B, const Matrix_t &A, Scalar_t alpha=1.0, Scalar_t beta=0.)
Sum columns of (m x n) matrix A and write the results into the first m elements in A.
static void Deflatten(Tensor_t &A, const Tensor_t &B)
Transforms each row of B to a matrix and stores it in the tensor B.
static void MultiplyTranspose(Matrix_t &output, const Matrix_t &input, const Matrix_t &weights)
Matrix-multiply input with the transpose of weights and write the results into output.
static void ScaleAdd(Matrix_t &A, const Matrix_t &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
This is the base class for the ROOT Random number generators.
double beta(double x, double y)
Calculates the beta function.
RooArgList L(Args_t &&... args)
struct TMVA::DNN::CNN::TConvParams TConvParams
create variable transformations
constexpr Double_t C()
Velocity of light in .