The TCuda architecture class.
Low-level interface class for CUDA computing architectures. Contains as public types the declaration of the scalar, matrix and buffer types for this architecture as well as the remaining functions in the low-level interface in the form of static members.
Public Member Functions | |
template<typename AMatrix_t > | |
void | CopyDiffArch (std::vector< TCudaMatrix< AFloat > > &B, const std::vector< AMatrix_t > &A) |
template<typename AMatrix_t > | |
void | CopyDiffArch (TCudaMatrix< AFloat > &B, const AMatrix_t &A) |
void | Multiply (TCudaMatrix< double > &C, const TCudaMatrix< double > &A, const TCudaMatrix< double > &B) |
void | Multiply (TCudaMatrix< float > &C, const TCudaMatrix< float > &A, const TCudaMatrix< float > &B) |
void | MultiplyTranspose (TCudaMatrix< double > &output, const TCudaMatrix< double > &input, const TCudaMatrix< double > &Weights) |
void | MultiplyTranspose (TCudaMatrix< float > &output, const TCudaMatrix< float > &input, const TCudaMatrix< float > &Weights) |
void | ScaleAdd (TCudaMatrix< double > &B, const TCudaMatrix< double > &A, double alpha) |
void | ScaleAdd (TCudaMatrix< float > &B, const TCudaMatrix< float > &A, float alpha) |
void | SumColumns (TCudaMatrix< double > &B, const TCudaMatrix< double > &A, double alpha, double beta) |
void | SumColumns (TCudaMatrix< float > &B, const TCudaMatrix< float > &A, float alpha, float beta) |
void | SumRows (TCudaMatrix< double > &B, const TCudaMatrix< double > &A) |
void | SumRows (TCudaMatrix< float > &B, const TCudaMatrix< float > &A) |
void | TransposeMultiply (TCudaMatrix< double > &C, const TCudaMatrix< double > &A, const TCudaMatrix< double > &B, double alpha, double beta) |
void | TransposeMultiply (TCudaMatrix< float > &C, const TCudaMatrix< float > &A, const TCudaMatrix< float > &B, float alpha, float beta) |
Static Public Member Functions | |
static void | ConvLayerForward (Tensor_t &output, Tensor_t &inputActivationFunc, const Tensor_t &input, const Matrix_t &weights, const Matrix_t &biases, const DNN::CNN::TConvParams ¶ms, EActivationFunction activFunc, Tensor_t &, const ConvDescriptors_t &, ConvWorkspace_t &) |
Forward propagation in the Convolutional layer. | |
static Tensor_t | CreateTensor (DeviceBuffer_t buffer, size_t b, size_t t, size_t w) |
static Tensor_t | CreateTensor (DeviceBuffer_t buffer, size_t n, size_t c, size_t h, size_t w) |
static Tensor_t | CreateTensor (size_t b, size_t t, size_t w) |
static Tensor_t | CreateTensor (size_t n, size_t c, size_t h, size_t w) |
static void | CreateWeightTensors (std::vector< Matrix_t > &newWeights, const std::vector< Matrix_t > &weights) |
static void | FreeConvWorkspace (TWorkspace *&) |
Only used for certain cudnn on-device memory. | |
static void | FreePoolDropoutWorkspace (TWorkspace *&) |
static void | FreeRNNWorkspace (TWorkspace *&) |
static TMVA::Experimental::MemoryLayout | GetTensorLayout () |
static void | InitializeActivationDescriptor (ActivationDescriptor_t &, EActivationFunction, double=0.0) |
static void | InitializeBNormDescriptors (TDescriptors *&, BNormLayer_t *) |
Initialize CNN data/operator descriptors. | |
static void | InitializeConvDescriptors (TDescriptors *&, ConvLayer_t *) |
static void | InitializeConvWorkspace (TWorkspace *&, TDescriptors *&, const DNN::CNN::TConvParams &, ConvLayer_t *) |
static void | InitializeGRUDescriptors (TDescriptors *&, GenLayer_t *) |
static void | InitializeGRUTensors (GenLayer_t *) |
static void | InitializeGRUWorkspace (TWorkspace *&, TDescriptors *&, GenLayer_t *) |
static void | InitializeLSTMDescriptors (TDescriptors *&, GenLayer_t *) |
static void | InitializeLSTMTensors (GenLayer_t *) |
static void | InitializeLSTMWorkspace (TWorkspace *&, TDescriptors *&, GenLayer_t *) |
static void | InitializePoolDescriptors (TDescriptors *&, PoolingLayer_t *) |
static void | InitializePoolDropoutWorkspace (TWorkspace *&, TDescriptors *&, const DNN::CNN::TConvParams &, PoolingLayer_t *) |
static void | InitializeRNNDescriptors (TDescriptors *&, GenLayer_t *) |
static void | InitializeRNNTensors (GenLayer_t *) |
static void | InitializeRNNWorkspace (TWorkspace *&, TDescriptors *&, GenLayer_t *) |
static bool | IsCudnn () |
static void | PrepareInternals (Tensor_t &) |
Dummy placeholder - preparation is currently only required for the CUDA architecture. | |
static void | ReleaseBNormDescriptors (TDescriptors *&) |
static void | ReleaseConvDescriptors (TDescriptors *&) |
Release CNN data/operator descriptors. | |
static void | ReleaseDescriptor (ActivationDescriptor_t &) |
static void | ReleasePoolDescriptors (TDescriptors *&) |
static void | ReleaseRNNDescriptors (TDescriptors *&) |
Forward Propagation | |
Low-level functions required for the forward propagation of activations through the network. | |
static void | MultiplyTranspose (Matrix_t &output, const Matrix_t &input, const Matrix_t &weights) |
Matrix-multiply input with the transpose of weights and write the results into output . | |
static void | MultiplyTranspose (Tensor_t &output, const Tensor_t &input, const Matrix_t &weights) |
static void | AddRowWise (Matrix_t &output, const Matrix_t &biases) |
Add the vectors biases row-wise to the matrix output. | |
static void | AddRowWise (Tensor_t &output, const Matrix_t &biases) |
Backward Propagation (Dense Layers) | |
Low-level functions required for the forward propagation of activations through the network. | |
static void | Backward (Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, const Tensor_t &df, const Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward) |
Perform the complete backward propagation step. | |
static void | ScaleAdd (Matrix_t &A, const Matrix_t &B, Scalar_t beta=1.0) |
Adds a the elements in matrix B scaled by c to the elements in the matrix A. | |
static void | Copy (Matrix_t &B, const Matrix_t &A) |
template<typename AMatrix_t > | |
static void | CopyDiffArch (Matrix_t &B, const AMatrix_t &A) |
static void | ScaleAdd (Tensor_t &A, const Tensor_t &B, Scalar_t beta=1.0) |
Above functions extended to vectors. | |
static void | Copy (Tensor_t &A, const Tensor_t &B) |
template<typename ATensor_t > | |
static void | CopyDiffArch (Tensor_t &A, const ATensor_t &B) |
template<typename AMatrix_t > | |
static void | CopyDiffArch (std::vector< Matrix_t > &A, const std::vector< AMatrix_t > &B) |
Activation Functions | |
For each activation function, the low-level interface contains two routines. One that applies the activation function to a matrix and one that evaluate the derivatives of the activation function at the elements of a given matrix and writes the results into the result matrix. | |
static void | ActivationFunctionForward (Tensor_t &X, EActivationFunction activFunct, const ActivationDescriptor_t activationDescr, const double coef=0.0, const AFloat alpha=1, const AFloat beta=0) |
static void | ActivationFunctionBackward (Tensor_t &dX, const Tensor_t &Y, const Tensor_t &dY, const Tensor_t &X, EActivationFunction activFunct, const ActivationDescriptor_t activationDescr, const AFloat alpha=1, const AFloat beta=0) |
Computes the gradient of the activation function. | |
static void | IdentityDerivative (Tensor_t &B, const Tensor_t &A) |
static void | Relu (Tensor_t &B) |
static void | ReluDerivative (Tensor_t &B, const Tensor_t &A) |
static void | Sigmoid (Tensor_t &B) |
static void | SigmoidDerivative (Tensor_t &B, const Tensor_t &A) |
static void | Tanh (Tensor_t &B) |
static void | TanhDerivative (Tensor_t &B, const Tensor_t &A) |
static void | FastTanh (Tensor_t &B) |
static void | FastTanhDerivative (Tensor_t &B, const Tensor_t &A) |
static void | SymmetricRelu (Tensor_t &B) |
static void | SymmetricReluDerivative (Tensor_t &B, const Tensor_t &A) |
static void | SoftSign (Tensor_t &B) |
static void | SoftSignDerivative (Tensor_t &B, const Tensor_t &A) |
static void | Gauss (Tensor_t &B) |
static void | GaussDerivative (Tensor_t &B, const Tensor_t &A) |
Loss Functions | |
Loss functions compute a scalar value given the For each function also a routing that computes the gradients (suffixed by Gradients) must be provided for the starting of the backpropagation algorithm. | |
static Scalar_t | MeanSquaredError (const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights) |
static void | MeanSquaredErrorGradients (Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights) |
static Scalar_t | CrossEntropy (const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights) |
Sigmoid transformation is implicitly applied, thus output should hold the linear activations of the last layer in the net. | |
static void | CrossEntropyGradients (Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights) |
static Scalar_t | SoftmaxCrossEntropy (const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights) |
Softmax transformation is implicitly applied, thus output should hold the linear activations of the last layer in the net. | |
static void | SoftmaxCrossEntropyGradients (Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights) |
Output Functions | |
Output functions transform the activations the identity function for regression or the sigmoid transformation for two-class classification. | |
static void | Sigmoid (Matrix_t &YHat, const Matrix_t &) |
static void | Softmax (Matrix_t &YHat, const Matrix_t &) |
Regularization | |
For each regularization type two functions are required, one named | |
static Scalar_t | L1Regularization (const Matrix_t &W) |
static void | AddL1RegularizationGradients (Matrix_t &A, const Matrix_t &W, Scalar_t weightDecay) |
static Scalar_t | L2Regularization (const Matrix_t &W) |
static void | AddL2RegularizationGradients (Matrix_t &A, const Matrix_t &W, Scalar_t weightDecay) |
Initialization | |
For each initialization method, one function in the low-level interface is provided. The naming scheme is Initialize<Type> for a given initialization method Type. | |
static void | InitializeGauss (Matrix_t &A) |
static void | InitializeUniform (Matrix_t &A) |
static void | InitializeIdentity (Matrix_t &A) |
static void | InitializeZero (Matrix_t &A) |
static void | InitializeZero (Tensor_t &A) |
static void | InitializeGlorotNormal (Matrix_t &A) |
Truncated normal initialization (Glorot, called also Xavier normal) The values are sample with a normal distribution with stddev = sqrt(2/N_input + N_output) and values larger than 2 * stddev are discarded See Glorot & Bengio, AISTATS 2010 - http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf. | |
static void | InitializeGlorotUniform (Matrix_t &A) |
Sample from a uniform distribution in range [ -lim,+lim] where lim = sqrt(6/N_in+N_out). | |
static TRandom & | GetRandomGenerator () |
static void | SetRandomSeed (size_t seed) |
Dropout | |
static void | DropoutForward (Tensor_t &A, TDescriptors *descriptors, TWorkspace *workspace, Scalar_t p) |
Apply dropout with activation probability p to the given tensor A and scale the result by reciprocal of p . | |
static void | DropoutForward (Matrix_t &A, Scalar_t p) |
static void | DropoutBackward (Tensor_t &, TDescriptors *, TWorkspace *) |
Batch Normalization Layer Propagation | |
static void | BatchNormLayerForwardTraining (int axis, const Tensor_t &x, Tensor_t &y, Matrix_t &gamma, Matrix_t &beta, Matrix_t &mean, Matrix_t &, Matrix_t &iVariance, Matrix_t &runningMeans, Matrix_t &runningVars, Scalar_t nTrainedBatches, Scalar_t momentum, Scalar_t epsilon, const TensorDescriptor_t &bnParDescriptor) |
The input from each batch are normalized during training to have zero mean and unit variance and they are then scaled by two parameter, different for each input variable: | |
static void | BatchNormLayerForwardInference (int axis, const Tensor_t &x, Matrix_t &gamma, Matrix_t &beta, Tensor_t &y, const Matrix_t &runningMeans, const Matrix_t &runningVars, Scalar_t epsilon, const TensorDescriptor_t &) |
During inference the inputs are not normalized using the batch mean but the previously computed at running mean and variance. | |
static void | BatchNormLayerBackward (int axis, const Tensor_t &x, const Tensor_t &dy, Tensor_t &dx, Matrix_t &gamma, Matrix_t &dgamma, Matrix_t &dbeta, const Matrix_t &mean, const Matrix_t &variance, const Matrix_t &iVariance, Scalar_t epsilon, const TensorDescriptor_t &) |
Forward Propagation in Convolutional Layer | |
static size_t | calculateDimension (size_t imgDim, size_t fltDim, size_t padding, size_t stride) |
Calculate how many neurons "fit" in the output layer, given the input as well as the layer's hyperparameters. | |
static void | Im2col (Matrix_t &A, const Matrix_t &B, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth) |
Transform the matrix B in local view format, suitable for convolution, and store it in matrix A. | |
static void | Im2colIndices (std::vector< int > &V, const Matrix_t &B, size_t nLocalViews, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth) |
static void | Im2colFast (Matrix_t &A, const Matrix_t &B, const std::vector< int > &V) |
static void | RotateWeights (Matrix_t &A, const Matrix_t &B, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t numFilters) |
Rotates the matrix B , which is representing a weights, and stores them in the matrix A . | |
static void | AddConvBiases (Matrix_t &output, const Matrix_t &biases) |
Add the biases in the Convolutional Layer. | |
Backward Propagation in Convolutional Layer | |
static void | ConvLayerBackward (Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, Tensor_t &df, Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward, const Tensor_t &outputTensor, EActivationFunction activFunc, const ConvDescriptors_t &, ConvWorkspace_t &, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews) |
Perform the complete backward propagation step in a Convolutional Layer. | |
static void | CalculateConvActivationGradients (Tensor_t &activationGradientsBackward, const Tensor_t &df, const Matrix_t &weights, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth) |
Utility function for calculating the activation gradients of the layer before the convolutional layer. | |
static void | CalculateConvWeightGradients (Matrix_t &weightGradients, const Tensor_t &df, const Tensor_t &activations_backward, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews) |
Utility function for calculating the weight gradients of the convolutional layer. | |
static void | CalculateConvBiasGradients (Matrix_t &biasGradients, const Tensor_t &df, size_t batchSize, size_t depth, size_t nLocalViews) |
Utility function for calculating the bias gradients of the convolutional layer. | |
Forward Propagation in Max Pooling Layer | |
static void | Downsample (Tensor_t &A, Tensor_t &B, const Tensor_t &C, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols) |
Downsample the matrix C to the matrix A , using max operation, such that the winning indices are stored in matrix B . | |
Backward Propagation in Max Pooling Layer | |
static void | MaxPoolLayerBackward (Tensor_t &activationGradientsBackward, const Tensor_t &activationGradients, const Tensor_t &indexMatrix, const Tensor_t &, const Tensor_t &, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t nLocalViews) |
Perform the complete backward propagation step in a Pooling Layer. | |
Forward and Backward Propagation in Reshape Layer | |
static void | Reshape (Matrix_t &A, const Matrix_t &B) |
Transform the matrix B to a matrix with different dimensions A . | |
static void | Flatten (Tensor_t &A, const Tensor_t &B) |
Flattens the tensor B , such that each matrix, is stretched in one row, resulting with a matrix A . | |
static void | Deflatten (Tensor_t &A, const Tensor_t &B) |
Transforms each row of B to a matrix and stores it in the tensor B . | |
static void | Rearrange (Tensor_t &out, const Tensor_t &in) |
Rearrage data according to time fill B x T x D out with T x B x D matrix in. | |
static Matrix_t & | RecurrentLayerBackward (Matrix_t &state_gradients_backward, Matrix_t &input_weight_gradients, Matrix_t &state_weight_gradients, Matrix_t &bias_gradients, Matrix_t &df, const Matrix_t &state, const Matrix_t &weights_input, const Matrix_t &weights_state, const Matrix_t &input, Matrix_t &input_gradient) |
Backward pass for Recurrent Networks. | |
static void | RNNForward (const Tensor_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, Tensor_t &, Matrix_t &, Matrix_t &, const RNNDescriptors_t &, RNNWorkspace_t &, bool) |
static void | RNNBackward (const Tensor_t &, const Matrix_t &, const Matrix_t &, const Tensor_t &, const Tensor_t &, const Matrix_t &, const Matrix_t &, const Tensor_t &, Tensor_t &, Matrix_t &, Matrix_t &, Tensor_t &, const RNNDescriptors_t &, RNNWorkspace_t &) |
static Matrix_t & | LSTMLayerBackward (Matrix_t &state_gradients_backward, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &) |
static Matrix_t & | GRULayerBackward (Matrix_t &state_gradients_backward, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, Matrix_t &, bool) |
Backward pass for GRU Network. | |
Additional Arithmetic Functions | |
Additional arithmetic on CUDA matrices used to implement the low-level interface. | |
static void | Multiply (Matrix_t &C, const Matrix_t &A, const Matrix_t &B) |
Standard multiplication of two matrices A and B with the result being written into C. | |
static void | TransposeMultiply (Matrix_t &output, const Matrix_t &input, const Matrix_t &Weights, Scalar_t alpha=1.0, Scalar_t beta=0.) |
Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C. | |
static void | Hadamard (Tensor_t &A, const Tensor_t &B) |
In-place Hadamard (element-wise) product of matrices A and B with the result being written into A . | |
static void | Hadamard (Matrix_t &A, const Matrix_t &B) |
static void | SumColumns (Matrix_t &B, const Matrix_t &A, Scalar_t alpha=1.0, Scalar_t beta=0.) |
Sum columns of (m x n) matrix A and write the results into the first m elements in A . | |
static Scalar_t | Sum (const Matrix_t &A) |
Compute the sum of all elements in A . | |
static bool | AlmostEquals (const Matrix_t &A, const Matrix_t &B, double epsilon=0.1) |
Check two matrices for equality, taking floating point arithmetic errors into account. | |
static void | ConstAdd (Matrix_t &A, Scalar_t beta) |
Add the constant beta to all the elements of matrix A and write the result into A . | |
static void | ConstMult (Matrix_t &A, Scalar_t beta) |
Multiply the constant beta to all the elements of matrix A and write the result into A . | |
static void | ReciprocalElementWise (Matrix_t &A) |
Reciprocal each element of the matrix A and write the result into A . | |
static void | SquareElementWise (Matrix_t &A) |
Square each element of the matrix A and write the result into A . | |
static void | SqrtElementWise (Matrix_t &A) |
Square root each element of the matrix A and write the result into A . | |
static void | AdamUpdate (Matrix_t &A, const Matrix_t &M, const Matrix_t &V, Scalar_t alpha, Scalar_t eps) |
Adam updates. | |
static void | AdamUpdateFirstMom (Matrix_t &A, const Matrix_t &B, Scalar_t beta) |
static void | AdamUpdateSecondMom (Matrix_t &A, const Matrix_t &B, Scalar_t beta) |
static void | PrintTensor (const Tensor_t &A, const std::string name="Cuda-tensor", bool=false) |
static void | SumRows (Matrix_t &B, const Matrix_t &A) |
extra functions defined only for CPU architecture !!! | |
Static Private Attributes | |
static TRandom * | fgRandomGen = nullptr |
#include <TMVA/DNN/Architectures/Cuda.h>
using TMVA::DNN::TCuda< AReal >::ActivationDescriptor_t = CudaActivationDescriptor |
using TMVA::DNN::TCuda< AReal >::AFloat = AReal |
using TMVA::DNN::TCuda< AReal >::AlgorithmBackward_t = CudaConvolutionBwdDataAlgo |
using TMVA::DNN::TCuda< AReal >::AlgorithmDataType_t = DummyCudaDataType |
using TMVA::DNN::TCuda< AReal >::AlgorithmForward_t = CudaConvolutionFwdAlgo |
using TMVA::DNN::TCuda< AReal >::AlgorithmHelper_t = CudaConvolutionBwdFilterAlgo |
using TMVA::DNN::TCuda< AReal >::BNormDescriptors_t = TDNNGenDescriptors<BNormLayer_t> |
using TMVA::DNN::TCuda< AReal >::BNormLayer_t = TBatchNormLayer<TCuda<AReal> > |
using TMVA::DNN::TCuda< AReal >::ConvDescriptors_t = CNN::TCNNDescriptors<ConvLayer_t> |
using TMVA::DNN::TCuda< AReal >::ConvLayer_t = CNN::TConvLayer<TCuda<AReal> > |
using TMVA::DNN::TCuda< AReal >::ConvolutionDescriptor_t = CudaConvolutionDescriptor |
using TMVA::DNN::TCuda< AReal >::ConvWorkspace_t = CNN::TCNNWorkspace<ConvLayer_t> |
using TMVA::DNN::TCuda< AReal >::DeviceBuffer_t = TCudaDeviceBuffer<AFloat> |
using TMVA::DNN::TCuda< AReal >::DropoutDescriptor_t = CudaDropoutDescriptor |
using TMVA::DNN::TCuda< AReal >::EmptyDescriptor_t = CudaEmptyDescriptor |
using TMVA::DNN::TCuda< AReal >::FilterDescriptor_t = CudaFilterDescriptor |
using TMVA::DNN::TCuda< AReal >::GenLayer_t = VGeneralLayer<TCuda<AReal> > |
using TMVA::DNN::TCuda< AReal >::HostBuffer_t = TCudaHostBuffer<AFloat> |
using TMVA::DNN::TCuda< AReal >::Matrix_t = TCudaMatrix<AFloat> |
using TMVA::DNN::TCuda< AReal >::PoolingDescriptor_t = CudaPoolingDescriptor |
using TMVA::DNN::TCuda< AReal >::PoolingDescriptors_t = CNN::TCNNDescriptors<PoolingLayer_t> |
using TMVA::DNN::TCuda< AReal >::PoolingLayer_t = CNN::TMaxPoolLayer<TCuda<AReal> > |
using TMVA::DNN::TCuda< AReal >::PoolingWorkspace_t = CNN::TCNNWorkspace<PoolingLayer_t> |
using TMVA::DNN::TCuda< AReal >::RecurrentDescriptor_t = DummyCudaDataType |
using TMVA::DNN::TCuda< AReal >::ReduceTensorDescriptor_t = DummyCudaDataType |
using TMVA::DNN::TCuda< AReal >::RNNDescriptors_t = RNN::TRNNDescriptors<TCuda<AReal> > |
using TMVA::DNN::TCuda< AReal >::RNNWorkspace_t = RNN::TRNNWorkspace<TCuda<AReal> > |
using TMVA::DNN::TCuda< AReal >::Scalar_t = AFloat |
using TMVA::DNN::TCuda< AReal >::Tensor_t = TCudaTensor<AFloat> |
using TMVA::DNN::TCuda< AReal >::TensorDescriptor_t = DummyCudaDataType |
|
static |
Computes the gradient of the activation function.
Definition at line 37 of file ActivationFunctions.cu.
|
static |
Definition at line 28 of file ActivationFunctions.cu.
|
static |
Adam updates.
Definition at line 425 of file Arithmetic.cu.
|
static |
Definition at line 441 of file Arithmetic.cu.
|
static |
Definition at line 455 of file Arithmetic.cu.
|
static |
|
static |
Definition at line 43 of file Regularization.cu.
|
static |
Definition at line 76 of file Regularization.cu.
|
static |
Add the vectors biases row-wise to the matrix output.
|
inlinestatic |
|
static |
Check two matrices for equality, taking floating point arithmetic errors into account.
Checks two matrices for element-wise equality.
AFloat | An architecture-specific floating point number type. |
A | The first matrix. |
B | The second matrix. |
epsilon | Equality tolerance, needed to address floating point arithmetic. |
Definition at line 291 of file Arithmetic.cu.
|
static |
Perform the complete backward propagation step.
If the provided activationGradientsBackward
matrix is not empty, compute the gradients of the objective function with respect to the activations of the previous layer (backward direction). Also compute the weight and the bias gradients. Modifies the values in df
and thus produces only a valid result, if it is applied the first time after the corresponding forward propagation has been per- formed.
Definition at line 91 of file Propagation.cu.
|
static |
Definition at line 754 of file Propagation.cu.
|
static |
During inference the inputs are not normalized using the batch mean but the previously computed at running mean and variance.
Definition at line 743 of file Propagation.cu.
|
static |
The input from each batch are normalized during training to have zero mean and unit variance and they are then scaled by two parameter, different for each input variable:
\gamma
gamma\beta
beta Definition at line 729 of file Propagation.cu.
|
static |
Utility function for calculating the activation gradients of the layer before the convolutional layer.
Definition at line 324 of file Propagation.cu.
|
static |
Utility function for calculating the bias gradients of the convolutional layer.
Definition at line 416 of file Propagation.cu.
|
static |
Utility function for calculating the weight gradients of the convolutional layer.
Definition at line 369 of file Propagation.cu.
|
static |
Calculate how many neurons "fit" in the output layer, given the input as well as the layer's hyperparameters.
Definition at line 151 of file Propagation.cu.
|
static |
Add the constant beta
to all the elements of matrix A
and write the result into A
.
Definition at line 357 of file Arithmetic.cu.
|
static |
Multiply the constant beta
to all the elements of matrix A
and write the result into A
.
Definition at line 371 of file Arithmetic.cu.
|
static |
Perform the complete backward propagation step in a Convolutional Layer.
If the provided activationGradientsBackward
matrix is not empty, compute the gradients of the objective function with respect to the activations of the previous layer (backward direction). Also compute the weight and the bias gradients. Modifies the values in df
and thus produces only a valid result, if it is applied the first time after the corresponding forward propagation has been per- formed.
Definition at line 276 of file Propagation.cu.
|
static |
Forward propagation in the Convolutional layer.
Definition at line 236 of file Propagation.cu.
|
static |
|
static |
|
static |
|
static |
void TMVA::DNN::TCuda< AReal >::CopyDiffArch | ( | std::vector< TCudaMatrix< AFloat > > & | B, |
const std::vector< AMatrix_t > & | A | ||
) |
void TMVA::DNN::TCuda< AReal >::CopyDiffArch | ( | TCudaMatrix< AFloat > & | B, |
const AMatrix_t & | A | ||
) |
|
static |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
static |
Sigmoid transformation is implicitly applied, thus output
should hold the linear activations of the last layer in the net.
Definition at line 68 of file LossFunctions.cu.
|
static |
Definition at line 88 of file LossFunctions.cu.
|
static |
Transforms each row of B
to a matrix and stores it in the tensor B
.
Deflatten a matrix into a vector of matrices.
[out] | A | Output matrices. Each element will be a part of the input. |
[in] | B | Input flat matrix. |
[in] | size | Number of matrices in the output vector. |
[in] | nRows | Number of rows in each matrix of the output vector. |
[in] | nCols | Number of columns on each matrix of the output vector. |
Each row in the input matrix is the concatenation of the same row in each of the output matrices. Passing an std::vector to a CUDA kernel is a non trivial task that requires manually allocating and copying to device memory - details in comments within the function's body. Launching one thread per input element.
Definition at line 670 of file Propagation.cu.
|
static |
Downsample the matrix C
to the matrix A
, using max operation, such that the winning indices are stored in matrix B
.
Downsampling function used as the forward propagation step of a Max-Pooling layer.
[out] | A | The output matrix. Each row corresponds to a slice and each element is the max within a receptive field. |
[out] | B | The winning indices matrix. Each element is the index of the max element. |
[in] | C | The input matrix. Each row is a slice. |
[in] | imgHeight | The heigh of the input. |
[in] | imgWidth | The output of the input. |
[in] | fltHeight | Height of the kernel. |
[in] | fltWidth | Width of the kernel. |
[in] | strideRows | stride size in the horizontal dimension. |
[in] | strideCols | stride size in the vertical dimension. |
Each output element is the maximum of the receptive field. We also save the winning indices to facilitate back-propagation - we need to know which input element influenced the output and only apply the derivative correction to this particular element. The slicing process is the same as in a convolutional layer, however padding is set to 0.
Definition at line 468 of file Propagation.cu.
|
inlinestatic |
|
inlinestatic |
|
static |
Apply dropout with activation probability p
to the given tensor A
and scale the result by reciprocal of p
.
|
inlinestatic |
|
inlinestatic |
|
static |
Flattens the tensor B
, such that each matrix, is stretched in one row, resulting with a matrix A
.
Flatten a vector of matrices into a single matrix.
[out] | A | Output matrix. |
[in] | B | Input vector. Each element is a matrix to be concatenated. |
[in] | size | Number of matrices in the input vector. |
[in] | nRows | Number of rows in each matrix of the input vector. |
[in] | nCols | Number of columns on each matrix of the input vector. |
Each row in the output matrix is the concatenation of the same row in each of the input matrices. Passing an std::vector to a CUDA kernel is a non trivial task that requires manually allocating and copying to device memory - details in comments within the function's body. Launching one thread per output element.
Definition at line 592 of file Propagation.cu.
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
static |
Definition at line 218 of file ActivationFunctions.cu.
|
static |
Definition at line 231 of file ActivationFunctions.cu.
|
static |
Definition at line 37 of file Initialization.cu.
|
inlinestatic |
|
inlinestatic |
|
static |
|
static |
In-place Hadamard (element-wise) product of matrices A
and B
with the result being written into A
.
|
static |
Definition at line 53 of file ActivationFunctions.cu.
|
static |
Transform the matrix B in local view format, suitable for convolution, and store it in matrix A.
A helper for image operations that rearranges image regions into column vectors.
[out] | A | The output matrix. Each row corresponds to a receptive field. |
[in] | B | The input matrix. Each row corresponds to a row in the image view. |
[in] | imgHeight | The heigh of the input. |
[in] | imgWidth | The output of the input. |
[in] | fltHeight | Height of the kernel. |
[in] | fltWidth | Width of the kernel. |
[in] | strideRows | stride size in the horizontal dimension. |
[in] | strideCols | stride size in the vertical dimension. |
[in] | zeroPaddingHeight | The padding in the horizontal dimension. |
[in] | zeroPaddingWidth | The padding in the vertical dimension. |
This transformation allows us to express a 2D convolution as a matrix multiplication. We can therefore harness the finely tuned GEMM implementation of cuBLAS to achieve maximum performance. This function can greatly speed-up propagation in TConvLayer.
Definition at line 183 of file Propagation.cu.
|
static |
|
static |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
static |
Definition at line 44 of file Initialization.cu.
|
static |
Truncated normal initialization (Glorot, called also Xavier normal) The values are sample with a normal distribution with stddev = sqrt(2/N_input + N_output) and values larger than 2 * stddev are discarded See Glorot & Bengio, AISTATS 2010 - http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf.
Definition at line 90 of file Initialization.cu.
|
static |
Sample from a uniform distribution in range [ -lim,+lim] where lim = sqrt(6/N_in+N_out).
This initialization is also called Xavier uniform see Glorot & Bengio, AISTATS 2010 - http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf
Definition at line 119 of file Initialization.cu.
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
static |
Definition at line 142 of file Initialization.cu.
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
static |
Definition at line 65 of file Initialization.cu.
|
static |
|
static |
|
inlinestatic |
|
static |
Definition at line 27 of file Regularization.cu.
|
static |
Definition at line 60 of file Regularization.cu.
|
inlinestatic |
|
static |
Perform the complete backward propagation step in a Pooling Layer.
Based on the winning indices stored in the index matrix, it just forwards the activation gradients to the previous layer.
Definition at line 499 of file Propagation.cu.
|
static |
Definition at line 28 of file LossFunctions.cu.
|
static |
Definition at line 48 of file LossFunctions.cu.
|
static |
Standard multiplication of two matrices A
and B
with the result being written into C.
void TMVA::DNN::TCuda< double >::Multiply | ( | TCudaMatrix< double > & | C, |
const TCudaMatrix< double > & | A, | ||
const TCudaMatrix< double > & | B | ||
) |
Definition at line 55 of file Arithmetic.cu.
void TMVA::DNN::TCuda< float >::Multiply | ( | TCudaMatrix< float > & | C, |
const TCudaMatrix< float > & | A, | ||
const TCudaMatrix< float > & | B | ||
) |
Definition at line 28 of file Arithmetic.cu.
|
static |
Matrix-multiply input
with the transpose of weights
and write the results into output
.
void TMVA::DNN::TCuda< double >::MultiplyTranspose | ( | TCudaMatrix< double > & | output, |
const TCudaMatrix< double > & | input, | ||
const TCudaMatrix< double > & | Weights | ||
) |
Definition at line 52 of file Propagation.cu.
void TMVA::DNN::TCuda< float >::MultiplyTranspose | ( | TCudaMatrix< float > & | output, |
const TCudaMatrix< float > & | input, | ||
const TCudaMatrix< float > & | Weights | ||
) |
Definition at line 28 of file Propagation.cu.
|
inlinestatic |
|
static |
|
static |
Rearrage data according to time fill B x T x D out with T x B x D matrix in.
Definition at line 548 of file Propagation.cu.
|
static |
Reciprocal each element of the matrix A
and write the result into A
.
Definition at line 385 of file Arithmetic.cu.
|
static |
Backward pass for Recurrent Networks.
Definition at line 29 of file RecurrentPropagation.cu.
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
static |
Definition at line 68 of file ActivationFunctions.cu.
|
static |
Definition at line 81 of file ActivationFunctions.cu.
|
static |
Transform the matrix B
to a matrix with different dimensions A
.
Definition at line 535 of file Propagation.cu.
|
inlinestatic |
|
inlinestatic |
|
static |
Rotates the matrix B
, which is representing a weights, and stores them in the matrix A
.
Definition at line 207 of file Propagation.cu.
|
static |
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
This is required for the weight update in the gradient descent step.
void TMVA::DNN::TCuda< double >::ScaleAdd | ( | TCudaMatrix< double > & | B, |
const TCudaMatrix< double > & | A, | ||
double | alpha | ||
) |
Definition at line 330 of file Arithmetic.cu.
void TMVA::DNN::TCuda< float >::ScaleAdd | ( | TCudaMatrix< float > & | B, |
const TCudaMatrix< float > & | A, | ||
float | alpha | ||
) |
Definition at line 317 of file Arithmetic.cu.
|
static |
Above functions extended to vectors.
|
static |
Definition at line 31 of file Initialization.cu.
|
static |
|
static |
|
static |
Definition at line 111 of file ActivationFunctions.cu.
|
static |
Definition at line 42 of file OutputFunctions.cu.
|
static |
Softmax transformation is implicitly applied, thus output
should hold the linear activations of the last layer in the net.
Definition at line 108 of file LossFunctions.cu.
|
static |
Definition at line 128 of file LossFunctions.cu.
|
static |
Definition at line 188 of file ActivationFunctions.cu.
|
static |
Definition at line 201 of file ActivationFunctions.cu.
|
static |
Square root each element of the matrix A
and write the result into A
.
Definition at line 411 of file Arithmetic.cu.
|
static |
Square each element of the matrix A
and write the result into A
.
Definition at line 398 of file Arithmetic.cu.
|
static |
Compute the sum of all elements in A
.
Definition at line 172 of file Arithmetic.cu.
|
static |
Sum columns of (m x n) matrix A
and write the results into the first m elements in A
.
void TMVA::DNN::TCuda< double >::SumColumns | ( | TCudaMatrix< double > & | B, |
const TCudaMatrix< double > & | A, | ||
double | alpha, | ||
double | beta | ||
) |
Definition at line 213 of file Arithmetic.cu.
void TMVA::DNN::TCuda< float >::SumColumns | ( | TCudaMatrix< float > & | B, |
const TCudaMatrix< float > & | A, | ||
float | alpha, | ||
float | beta | ||
) |
Definition at line 189 of file Arithmetic.cu.
|
static |
extra functions defined only for CPU architecture !!!
Sum rows of (m x n) matrix A
and write the results into the first m elements in B
.
void TMVA::DNN::TCuda< double >::SumRows | ( | TCudaMatrix< double > & | B, |
const TCudaMatrix< double > & | A | ||
) |
Definition at line 259 of file Arithmetic.cu.
void TMVA::DNN::TCuda< float >::SumRows | ( | TCudaMatrix< float > & | B, |
const TCudaMatrix< float > & | A | ||
) |
Definition at line 236 of file Arithmetic.cu.
|
static |
Definition at line 158 of file ActivationFunctions.cu.
|
static |
Definition at line 171 of file ActivationFunctions.cu.
|
static |
Definition at line 128 of file ActivationFunctions.cu.
|
static |
Definition at line 141 of file ActivationFunctions.cu.
|
static |
Matrix multiplication of two matrices A
and B^T
(transposed) with the result being written into C.
void TMVA::DNN::TCuda< double >::TransposeMultiply | ( | TCudaMatrix< double > & | C, |
const TCudaMatrix< double > & | A, | ||
const TCudaMatrix< double > & | B, | ||
double | alpha, | ||
double | beta | ||
) |
Definition at line 109 of file Arithmetic.cu.
void TMVA::DNN::TCuda< float >::TransposeMultiply | ( | TCudaMatrix< float > & | C, |
const TCudaMatrix< float > & | A, | ||
const TCudaMatrix< float > & | B, | ||
float | alpha, | ||
float | beta | ||
) |
Definition at line 82 of file Arithmetic.cu.
|
staticprivate |