| 
| static void  | ConvLayerForward (Tensor_t &output, Tensor_t &inputActivationFunc, const Tensor_t &input, const Matrix_t &weights, const Matrix_t &biases, const DNN::CNN::TConvParams ¶ms, EActivationFunction activFunc, Tensor_t &, const ConvDescriptors_t &, ConvWorkspace_t &) | 
|   | Forward propagation in the Convolutional layer.  
  | 
|   | 
| static Tensor_t  | CreateTensor (DeviceBuffer_t buffer, size_t b, size_t t, size_t w) | 
|   | 
| static Tensor_t  | CreateTensor (DeviceBuffer_t buffer, size_t n, size_t c, size_t h, size_t w) | 
|   | 
| static Tensor_t  | CreateTensor (size_t b, size_t t, size_t w) | 
|   | 
| static Tensor_t  | CreateTensor (size_t n, size_t c, size_t h, size_t w) | 
|   | 
| static void  | CreateWeightTensors (std::vector< Matrix_t > &newWeights, const std::vector< Matrix_t > &weights) | 
|   | 
| static void  | FreeConvWorkspace (TWorkspace *&) | 
|   | Only used for certain cudnn on-device memory.  
  | 
|   | 
| static void  | FreePoolDropoutWorkspace (TWorkspace *&) | 
|   | 
| static void  | FreeRNNWorkspace (TWorkspace *&) | 
|   | 
| static TMVA::Experimental::MemoryLayout  | GetTensorLayout () | 
|   | 
| static void  | InitializeActivationDescriptor (ActivationDescriptor_t &, EActivationFunction, double=0.0) | 
|   | 
| static void  | InitializeBNormDescriptors (TDescriptors *&, BNormLayer_t *) | 
|   | Initialize CNN data/operator descriptors.  
  | 
|   | 
| static void  | InitializeConvDescriptors (TDescriptors *&, ConvLayer_t *) | 
|   | 
| static void  | InitializeConvWorkspace (TWorkspace *&, TDescriptors *&, const DNN::CNN::TConvParams &, ConvLayer_t *) | 
|   | 
| static void  | InitializeGRUDescriptors (TDescriptors *&, GenLayer_t *) | 
|   | 
| static void  | InitializeGRUTensors (GenLayer_t *) | 
|   | 
| static void  | InitializeGRUWorkspace (TWorkspace *&, TDescriptors *&, GenLayer_t *) | 
|   | 
| static void  | InitializeLSTMDescriptors (TDescriptors *&, GenLayer_t *) | 
|   | 
| static void  | InitializeLSTMTensors (GenLayer_t *) | 
|   | 
| static void  | InitializeLSTMWorkspace (TWorkspace *&, TDescriptors *&, GenLayer_t *) | 
|   | 
| static void  | InitializePoolDescriptors (TDescriptors *&, PoolingLayer_t *) | 
|   | 
| static void  | InitializePoolDropoutWorkspace (TWorkspace *&, TDescriptors *&, const DNN::CNN::TConvParams &, PoolingLayer_t *) | 
|   | 
| static void  | InitializeRNNDescriptors (TDescriptors *&, GenLayer_t *) | 
|   | 
| static void  | InitializeRNNTensors (GenLayer_t *) | 
|   | 
| static void  | InitializeRNNWorkspace (TWorkspace *&, TDescriptors *&, GenLayer_t *) | 
|   | 
| static bool  | IsCudnn () | 
|   | 
| static void  | PrepareInternals (Tensor_t &) | 
|   | Dummy placeholder - preparation is currently only required for the CUDA architecture.  
  | 
|   | 
| static void  | ReleaseBNormDescriptors (TDescriptors *&) | 
|   | 
| static void  | ReleaseConvDescriptors (TDescriptors *&) | 
|   | Release CNN data/operator descriptors.  
  | 
|   | 
| static void  | ReleaseDescriptor (ActivationDescriptor_t &) | 
|   | 
| static void  | ReleasePoolDescriptors (TDescriptors *&) | 
|   | 
| static void  | ReleaseRNNDescriptors (TDescriptors *&) | 
|   | 
 | 
Low-level functions required for the forward propagation of activations through the network.  
 | 
| static void  | MultiplyTranspose (Matrix_t &output, const Matrix_t &input, const Matrix_t &weights) | 
|   | Matrix-multiply input with the transpose of weights and write the results into output.  
  | 
|   | 
| static void  | MultiplyTranspose (Tensor_t &output, const Tensor_t &input, const Matrix_t &weights) | 
|   | 
| static void  | AddRowWise (Matrix_t &output, const Matrix_t &biases) | 
|   | Add the vectors biases row-wise to the matrix output.  
  | 
|   | 
| static void  | AddRowWise (Tensor_t &output, const Matrix_t &biases) | 
|   | 
 | 
Low-level functions required for the forward propagation of activations through the network.  
 | 
| static void  | Backward (Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, const Tensor_t &df, const Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward) | 
|   | Perform the complete backward propagation step.  
  | 
|   | 
| static void  | ScaleAdd (Matrix_t &A, const Matrix_t &B, Scalar_t beta=1.0) | 
|   | Adds a the elements in matrix B scaled by c to the elements in the matrix A.  
  | 
|   | 
| static void  | Copy (Matrix_t &B, const Matrix_t &A) | 
|   | 
| template<typename AMatrix_t >  | 
| static void  | CopyDiffArch (Matrix_t &B, const AMatrix_t &A) | 
|   | 
| static void  | ScaleAdd (Tensor_t &A, const Tensor_t &B, Scalar_t beta=1.0) | 
|   | Above functions extended to vectors.  
  | 
|   | 
| static void  | Copy (Tensor_t &A, const Tensor_t &B) | 
|   | 
| template<typename ATensor_t >  | 
| static void  | CopyDiffArch (Tensor_t &A, const ATensor_t &B) | 
|   | 
| template<typename AMatrix_t >  | 
| static void  | CopyDiffArch (std::vector< Matrix_t > &A, const std::vector< AMatrix_t > &B) | 
|   | 
 | 
For each activation function, the low-level interface contains two routines. 
One that applies the activation function to a matrix and one that evaluate the derivatives of the activation function at the elements of a given matrix and writes the results into the result matrix.  
 | 
| static void  | ActivationFunctionForward (Tensor_t &X, EActivationFunction activFunct, const ActivationDescriptor_t activationDescr, const double coef=0.0, const Scalar_t alpha=1, const Scalar_t beta=0) | 
|   | 
| static void  | ActivationFunctionBackward (Tensor_t &dX, const Tensor_t &Y, const Tensor_t &dY, const Tensor_t &X, EActivationFunction activFunct, const ActivationDescriptor_t activationDescr, const Scalar_t alpha=1, const Scalar_t beta=0) | 
|   | Computes the gradient of the activation function.  
  | 
|   | 
| static void  | IdentityDerivative (Tensor_t &B, const Tensor_t &A) | 
|   | 
| static void  | Relu (Tensor_t &B) | 
|   | 
| static void  | ReluDerivative (Tensor_t &B, const Tensor_t &A) | 
|   | 
| static void  | Sigmoid (Tensor_t &B) | 
|   | 
| static void  | SigmoidDerivative (Tensor_t &B, const Tensor_t &A) | 
|   | 
| static void  | Tanh (Tensor_t &B) | 
|   | 
| static void  | TanhDerivative (Tensor_t &B, const Tensor_t &A) | 
|   | 
| static void  | FastTanh (Tensor_t &B) | 
|   | 
| static void  | FastTanhDerivative (Tensor_t &B, const Tensor_t &A) | 
|   | 
| static void  | SymmetricRelu (Tensor_t &B) | 
|   | 
| static void  | SymmetricReluDerivative (Tensor_t &B, const Tensor_t &A) | 
|   | 
| static void  | SoftSign (Tensor_t &B) | 
|   | 
| static void  | SoftSignDerivative (Tensor_t &B, const Tensor_t &A) | 
|   | 
| static void  | Gauss (Tensor_t &B) | 
|   | 
| static void  | GaussDerivative (Tensor_t &B, const Tensor_t &A) | 
|   | 
 | 
Loss functions compute a scalar value given the output of the network for a given training input and the expected network prediction Y that quantifies the quality of the prediction. 
For each function also a routing that computes the gradients (suffixed by Gradients) must be provided for the starting of the backpropagation algorithm.  
 | 
| static Scalar_t  | MeanSquaredError (const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights) | 
|   | 
| static void  | MeanSquaredErrorGradients (Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights) | 
|   | 
| static Scalar_t  | CrossEntropy (const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights) | 
|   | Sigmoid transformation is implicitly applied, thus output should hold the linear activations of the last layer in the net.  
  | 
|   | 
| static void  | CrossEntropyGradients (Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights) | 
|   | 
| static Scalar_t  | SoftmaxCrossEntropy (const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights) | 
|   | Softmax transformation is implicitly applied, thus output should hold the linear activations of the last layer in the net.  
  | 
|   | 
| static void  | SoftmaxCrossEntropyGradients (Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights) | 
|   | 
 | 
Output functions transform the activations output of the output layer in the network to a valid prediction YHat for the desired usage of the network, e.g. 
the identity function for regression or the sigmoid transformation for two-class classification.  
 | 
| static void  | Sigmoid (Matrix_t &YHat, const Matrix_t &) | 
|   | 
| static void  | Softmax (Matrix_t &YHat, const Matrix_t &) | 
|   | 
 | 
For each regularization type two functions are required, one named <Type>Regularization that evaluates the corresponding regularization functional for a given weight matrix and the Add<Type>RegularizationGradients, that adds the regularization component in the gradients to the provided matrix.  
 | 
| static Scalar_t  | L1Regularization (const Matrix_t &W) | 
|   | 
| static void  | AddL1RegularizationGradients (Matrix_t &A, const Matrix_t &W, Scalar_t weightDecay) | 
|   | 
| static Scalar_t  | L2Regularization (const Matrix_t &W) | 
|   | 
| static void  | AddL2RegularizationGradients (Matrix_t &A, const Matrix_t &W, Scalar_t weightDecay) | 
|   | 
 | 
For each initialization method, one function in the low-level interface is provided. 
The naming scheme is  
Initialize<Type> 
for a given initialization method Type.  
 | 
| static void  | InitializeGauss (Matrix_t &A) | 
|   | 
| static void  | InitializeUniform (Matrix_t &A) | 
|   | 
| static void  | InitializeIdentity (Matrix_t &A) | 
|   | 
| static void  | InitializeZero (Matrix_t &A) | 
|   | 
| static void  | InitializeZero (Tensor_t &A) | 
|   | 
| static void  | InitializeGlorotNormal (Matrix_t &A) | 
|   | Truncated normal initialization (Glorot, called also Xavier normal) The values are sample with a normal distribution with stddev = sqrt(2/N_input + N_output) and values larger than 2 * stddev are discarded See Glorot & Bengio, AISTATS 2010 - http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf.  
  | 
|   | 
| static void  | InitializeGlorotUniform (Matrix_t &A) | 
|   | Sample from a uniform distribution in range [ -lim,+lim] where lim = sqrt(6/N_in+N_out).  
  | 
|   | 
| static TRandom &  | GetRandomGenerator () | 
|   | 
| static void  | SetRandomSeed (size_t seed) | 
|   | 
 | 
| static void  | DropoutForward (Tensor_t &A, TDescriptors *descriptors, TWorkspace *workspace, Scalar_t p) | 
|   | Apply dropout with activation probability p to the given tensor A and scale the result by reciprocal of p.  
  | 
|   | 
| static void  | DropoutForward (Matrix_t &A, Scalar_t p) | 
|   | 
| static void  | DropoutBackward (Tensor_t &, TDescriptors *, TWorkspace *) | 
|   | 
 | 
| static void  | BatchNormLayerForwardTraining (int axis, const Tensor_t &x, Tensor_t &y, Matrix_t &gamma, Matrix_t &beta, Matrix_t &mean, Matrix_t &, Matrix_t &iVariance, Matrix_t &runningMeans, Matrix_t &runningVars, Scalar_t nTrainedBatches, Scalar_t momentum, Scalar_t epsilon, const TensorDescriptor_t &bnParDescriptor) | 
|   | The input from each batch are normalized during training to have zero mean and unit variance and they are then scaled by two parameter, different for each input variable:  
  | 
|   | 
| static void  | BatchNormLayerForwardInference (int axis, const Tensor_t &x, Matrix_t &gamma, Matrix_t &beta, Tensor_t &y, const Matrix_t &runningMeans, const Matrix_t &runningVars, Scalar_t epsilon, const TensorDescriptor_t &) | 
|   | During inference the inputs are not normalized using the batch mean but the previously computed at running mean and variance.  
  | 
|   | 
| static void  | BatchNormLayerBackward (int axis, const Tensor_t &x, const Tensor_t &dy, Tensor_t &dx, Matrix_t &gamma, Matrix_t &dgamma, Matrix_t &dbeta, const Matrix_t &mean, const Matrix_t &variance, const Matrix_t &iVariance, Scalar_t epsilon, const TensorDescriptor_t &) | 
|   | 
| static Tensor_t  | BatchNormLayerReshapeTensor (int axis, const Tensor_t &x) | 
|   | 
 | 
| static size_t  | calculateDimension (size_t imgDim, size_t fltDim, size_t padding, size_t stride) | 
|   | Calculate how many neurons "fit" in the output layer, given the input as well as the layer's hyperparameters.  
  | 
|   | 
| static void  | Im2col (Matrix_t &A, const Matrix_t &B, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth) | 
|   | Transform the matrix B in local view format, suitable for convolution, and store it in matrix A.  
  | 
|   | 
| static void  | Im2colIndices (std::vector< int > &V, const Matrix_t &B, size_t nLocalViews, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth) | 
|   | 
| static void  | Im2colFast (Matrix_t &A, const Matrix_t &B, const std::vector< int > &V) | 
|   | 
| static void  | RotateWeights (Matrix_t &A, const Matrix_t &B, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t numFilters) | 
|   | Rotates the matrix B, which is representing a weights, and stores them in the matrix A.  
  | 
|   | 
| static void  | AddConvBiases (Matrix_t &output, const Matrix_t &biases) | 
|   | Add the biases in the Convolutional Layer.  
  | 
|   | 
 | 
| static void  | ConvLayerBackward (Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, Tensor_t &df, Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward, const Tensor_t &outputTensor, EActivationFunction activFunc, const ConvDescriptors_t &, ConvWorkspace_t &, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews) | 
|   | Perform the complete backward propagation step in a Convolutional Layer.  
  | 
|   | 
| static void  | CalculateConvActivationGradients (Tensor_t &activationGradientsBackward, const Tensor_t &df, const Matrix_t &weights, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth) | 
|   | Utility function for calculating the activation gradients of the layer before the convolutional layer.  
  | 
|   | 
| static void  | CalculateConvWeightGradients (Matrix_t &weightGradients, const Tensor_t &df, const Tensor_t &activations_backward, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews) | 
|   | Utility function for calculating the weight gradients of the convolutional layer.  
  | 
|   | 
| static void  | CalculateConvBiasGradients (Matrix_t &biasGradients, const Tensor_t &df, size_t batchSize, size_t depth, size_t nLocalViews) | 
|   | Utility function for calculating the bias gradients of the convolutional layer.  
  | 
|   | 
 | 
| static void  | Downsample (Tensor_t &A, Tensor_t &B, const Tensor_t &C, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols) | 
|   | Downsample the matrix C to the matrix A, using max operation, such that the winning indices are stored in matrix B.  
  | 
|   | 
 | 
| static void  | MaxPoolLayerBackward (Tensor_t &activationGradientsBackward, const Tensor_t &activationGradients, const Tensor_t &indexMatrix, const Tensor_t &, const Tensor_t &, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t nLocalViews) | 
|   | Perform the complete backward propagation step in a Pooling Layer.  
  | 
|   | 
| static Matrix_t &  | RecurrentLayerBackward (Matrix_t &state_gradients_backward, Matrix_t &input_weight_gradients, Matrix_t &state_weight_gradients, Matrix_t &bias_gradients, Matrix_t &df, const Matrix_t &state, const Matrix_t &weights_input, const Matrix_t &weights_state, const Matrix_t &input, Matrix_t &input_gradient) | 
|   | Backward pass for Recurrent Networks.  
  | 
|   | 
| static void  | RNNForward (const Tensor_t &, const Matrix_t &, const Matrix_t &, const Tensor_t &, Tensor_t &, Matrix_t &, Matrix_t &, const RNNDescriptors_t &, RNNWorkspace_t &, bool) | 
|   | 
| static void  | RNNBackward (const Tensor_t &, const Matrix_t &, const Matrix_t &, const Tensor_t &, const Tensor_t &, const Matrix_t &, const Matrix_t &, const Tensor_t &, Tensor_t &, Matrix_t &, Matrix_t &, Tensor_t &, const RNNDescriptors_t &, RNNWorkspace_t &) | 
|   | 
| static Matrix_t &  | LSTMLayerBackward (TCpuMatrix< Scalar_t > &state_gradients_backward, TCpuMatrix< Scalar_t > &cell_gradients_backward, TCpuMatrix< Scalar_t > &input_weight_gradients, TCpuMatrix< Scalar_t > &forget_weight_gradients, TCpuMatrix< Scalar_t > &candidate_weight_gradients, TCpuMatrix< Scalar_t > &output_weight_gradients, TCpuMatrix< Scalar_t > &input_state_weight_gradients, TCpuMatrix< Scalar_t > &forget_state_weight_gradients, TCpuMatrix< Scalar_t > &candidate_state_weight_gradients, TCpuMatrix< Scalar_t > &output_state_weight_gradients, TCpuMatrix< Scalar_t > &input_bias_gradients, TCpuMatrix< Scalar_t > &forget_bias_gradients, TCpuMatrix< Scalar_t > &candidate_bias_gradients, TCpuMatrix< Scalar_t > &output_bias_gradients, TCpuMatrix< Scalar_t > &di, TCpuMatrix< Scalar_t > &df, TCpuMatrix< Scalar_t > &dc, TCpuMatrix< Scalar_t > &dout, const TCpuMatrix< Scalar_t > &precStateActivations, const TCpuMatrix< Scalar_t > &precCellActivations, const TCpuMatrix< Scalar_t > &fInput, const TCpuMatrix< Scalar_t > &fForget, const TCpuMatrix< Scalar_t > &fCandidate, const TCpuMatrix< Scalar_t > &fOutput, const TCpuMatrix< Scalar_t > &weights_input, const TCpuMatrix< Scalar_t > &weights_forget, const TCpuMatrix< Scalar_t > &weights_candidate, const TCpuMatrix< Scalar_t > &weights_output, const TCpuMatrix< Scalar_t > &weights_input_state, const TCpuMatrix< Scalar_t > &weights_forget_state, const TCpuMatrix< Scalar_t > &weights_candidate_state, const TCpuMatrix< Scalar_t > &weights_output_state, const TCpuMatrix< Scalar_t > &input, TCpuMatrix< Scalar_t > &input_gradient, TCpuMatrix< Scalar_t > &cell_gradient, TCpuMatrix< Scalar_t > &cell_tanh) | 
|   | Backward pass for LSTM Network.  
  | 
|   | 
| static Matrix_t &  | GRULayerBackward (TCpuMatrix< Scalar_t > &state_gradients_backward, TCpuMatrix< Scalar_t > &reset_weight_gradients, TCpuMatrix< Scalar_t > &update_weight_gradients, TCpuMatrix< Scalar_t > &candidate_weight_gradients, TCpuMatrix< Scalar_t > &reset_state_weight_gradients, TCpuMatrix< Scalar_t > &update_state_weight_gradients, TCpuMatrix< Scalar_t > &candidate_state_weight_gradients, TCpuMatrix< Scalar_t > &reset_bias_gradients, TCpuMatrix< Scalar_t > &update_bias_gradients, TCpuMatrix< Scalar_t > &candidate_bias_gradients, TCpuMatrix< Scalar_t > &dr, TCpuMatrix< Scalar_t > &du, TCpuMatrix< Scalar_t > &dc, const TCpuMatrix< Scalar_t > &precStateActivations, const TCpuMatrix< Scalar_t > &fReset, const TCpuMatrix< Scalar_t > &fUpdate, const TCpuMatrix< Scalar_t > &fCandidate, const TCpuMatrix< Scalar_t > &weights_reset, const TCpuMatrix< Scalar_t > &weights_update, const TCpuMatrix< Scalar_t > &weights_candidate, const TCpuMatrix< Scalar_t > &weights_reset_state, const TCpuMatrix< Scalar_t > &weights_update_state, const TCpuMatrix< Scalar_t > &weights_candidate_state, const TCpuMatrix< Scalar_t > &input, TCpuMatrix< Scalar_t > &input_gradient, bool resetGateAfter) | 
|   | Backward pass for GRU Network.  
  | 
|   | 
 | 
| static void  | Reshape (Matrix_t &A, const Matrix_t &B) | 
|   | Transform the matrix B to a matrix with different dimensions A.  
  | 
|   | 
| static void  | Flatten (Tensor_t &A, const Tensor_t &B) | 
|   | Flattens the tensor B, such that each matrix, is stretched in one row, resulting with a matrix A.  
  | 
|   | 
| static void  | Deflatten (Tensor_t &A, const Tensor_t &B) | 
|   | Transforms each row of B to a matrix and stores it in the tensor B.  
  | 
|   | 
| static void  | Rearrange (Tensor_t &out, const Tensor_t &in) | 
|   | Rearrage data according to time fill B x T x D out with T x B x D matrix in.  
  | 
|   | 
 | 
Additional arithmetic on CUDA matrices used to implement the low-level interface.  
 | 
| static void  | Multiply (Matrix_t &C, const Matrix_t &A, const Matrix_t &B) | 
|   | Standard multiplication of two matrices A and B with the result being written into C.  
  | 
|   | 
| static void  | TransposeMultiply (Matrix_t &output, const Matrix_t &input, const Matrix_t &Weights, Scalar_t alpha=1.0, Scalar_t beta=0.) | 
|   | Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C.  
  | 
|   | 
| static void  | Hadamard (Tensor_t &A, const Tensor_t &B) | 
|   | In-place Hadamard (element-wise) product of matrices A and B with the result being written into A.  
  | 
|   | 
| static void  | Hadamard (Matrix_t &A, const Matrix_t &B) | 
|   | 
| static void  | SumColumns (Matrix_t &B, const Matrix_t &A, Scalar_t alpha=1.0, Scalar_t beta=0.) | 
|   | Sum columns of (m x n) matrix A and write the results into the first m elements in A.  
  | 
|   | 
| static Scalar_t  | Sum (const Matrix_t &A) | 
|   | Compute the sum of all elements in A.  
  | 
|   | 
| static bool  | AlmostEquals (const Matrix_t &A, const Matrix_t &B, double epsilon=0.1) | 
|   | Check two matrices for equality, taking floating point arithmetic errors into account.  
  | 
|   | 
| static void  | ConstAdd (Matrix_t &A, Scalar_t beta) | 
|   | Add the constant beta to all the elements of matrix A and write the result into A.  
  | 
|   | 
| static void  | ConstMult (Matrix_t &A, Scalar_t beta) | 
|   | Multiply the constant beta to all the elements of matrix A and write the result into A.  
  | 
|   | 
| static void  | ReciprocalElementWise (Matrix_t &A) | 
|   | Reciprocal each element of the matrix A and write the result into A.  
  | 
|   | 
| static void  | SquareElementWise (Matrix_t &A) | 
|   | Square each element of the matrix A and write the result into A.  
  | 
|   | 
| static void  | SqrtElementWise (Matrix_t &A) | 
|   | Square root each element of the matrix A and write the result into A.  
  | 
|   | 
| static void  | AdamUpdate (Matrix_t &A, const Matrix_t &M, const Matrix_t &V, Scalar_t alpha, Scalar_t eps) | 
|   | Adam updates.  
  | 
|   | 
| static void  | AdamUpdateFirstMom (Matrix_t &A, const Matrix_t &B, Scalar_t beta) | 
|   | 
| static void  | AdamUpdateSecondMom (Matrix_t &A, const Matrix_t &B, Scalar_t beta) | 
|   | 
| static void  | PrintTensor (const Tensor_t &A, const std::string name="Cpu-tensor", bool truncate=false) | 
|   |