26#include "cuda_runtime.h"
35template <
typename AFloat>
38 cudaFreeHost(*devicePointer);
39 delete[] devicePointer;
43template <
typename AFloat>
46 AFloat **pointer =
new AFloat *[1];
47 cudaMallocHost(pointer,
size *
sizeof(AFloat));
52template <
typename AFloat>
59template <
typename AFloat>
69template <
typename AFloat>
78template <
typename AFloat>
81 cudaFree(*devicePointer);
82 delete[] devicePointer;
86template <
typename AFloat>
89 AFloat **pointer =
new AFloat *[1];
90 cudaMalloc(pointer,
size *
sizeof(AFloat));
96template <
typename AFloat>
100 AFloat **pointer =
new AFloat *[1];
101 cudaMalloc(pointer,
size *
sizeof(AFloat));
106template <
typename AFloat>
110 AFloat **pointer =
new AFloat *[1];
111 *pointer = devicePointer;
116template <
typename AFloat>
126template <
typename AFloat>
133template <
typename AFloat>
141template <
typename AFloat>
156 for (
size_t i = 0; i < batchSize; i++) {
157 size_t sampleIndex = *sampleIterator;
158 for (
size_t j = 0; j <
n; j++) {
159 size_t bufferIndex = j * batchSize + i;
160 buffer[bufferIndex] =
static_cast<float>(
inputMatrix(sampleIndex, j));
174 for (
size_t i = 0; i < batchSize; i++) {
175 size_t sampleIndex = *sampleIterator;
176 for (
size_t j = 0; j <
n; j++) {
177 size_t bufferIndex = j * batchSize + i;
178 buffer[bufferIndex] =
static_cast<float>(
outputMatrix(sampleIndex, j));
190 for (
size_t i = 0; i < batchSize; i++) {
191 buffer[i] =
static_cast<float>(
weightMatrix(*sampleIterator, 0));
202 size_t n =
event->GetNVariables();
203 for (
size_t i = 0; i < batchSize; i++) {
204 size_t sampleIndex = * sampleIterator++;
205 event = std::get<0>(
fData)[sampleIndex];
206 for (
size_t j = 0; j <
n; j++) {
207 size_t bufferIndex = j * batchSize + i;
208 buffer[bufferIndex] =
static_cast<float>(
event->GetValue(j));
219 size_t n = buffer.
GetSize() / batchSize;
223 for (
size_t i = 0; i < batchSize; i++) {
224 size_t sampleIndex = *sampleIterator++;
225 Event *
event = std::get<0>(
fData)[sampleIndex];
226 for (
size_t j = 0; j <
n; j++) {
228 size_t bufferIndex = j * batchSize + i;
230 if (event->GetNTargets() == 0) {
233 buffer[bufferIndex] = (info.
IsSignal(event)) ? 1.0 : 0.0;
236 buffer[bufferIndex] = 0.0;
237 if (j == event->GetClass()) {
238 buffer[bufferIndex] = 1.0;
242 buffer[bufferIndex] =
static_cast<float>(
event->GetTarget(j));
253 for (
size_t i = 0; i < batchSize; i++) {
254 size_t sampleIndex = *sampleIterator++;
255 Event *
event = std::get<0>(
fData)[sampleIndex];
256 buffer[i] =
static_cast<float>(
event->GetWeight());
268 for (
size_t i = 0; i < batchSize; i++) {
269 size_t sampleIndex = *sampleIterator;
270 for (
size_t j = 0; j <
n; j++) {
271 size_t bufferIndex = j * batchSize + i;
286 for (
size_t i = 0; i < batchSize; i++) {
287 size_t sampleIndex = *sampleIterator;
288 for (
size_t j = 0; j <
n; j++) {
289 size_t bufferIndex = j * batchSize + i;
302 for (
size_t i = 0; i < batchSize; i++) {
303 buffer[i] =
static_cast<double>(
weightMatrix(*sampleIterator, 0));
314 size_t n =
event->GetNVariables();
315 for (
size_t i = 0; i < batchSize; i++) {
316 size_t sampleIndex = * sampleIterator++;
317 event = std::get<0>(
fData)[sampleIndex];
318 for (
size_t j = 0; j <
n; j++) {
319 size_t bufferIndex = j * batchSize + i;
320 buffer[bufferIndex] =
event->GetValue(j);
331 size_t n = buffer.
GetSize() / batchSize;
335 for (
size_t i = 0; i < batchSize; i++) {
336 size_t sampleIndex = *sampleIterator++;
337 Event *
event = std::get<0>(
fData)[sampleIndex];
338 for (
size_t j = 0; j <
n; j++) {
340 size_t bufferIndex = j * batchSize + i;
342 if (event->GetNTargets() == 0) {
345 buffer[bufferIndex] = (info.
IsSignal(event)) ? 1.0 : 0.0;
348 buffer[bufferIndex] = 0.0;
349 if (j == event->GetClass()) {
350 buffer[bufferIndex] = 1.0;
354 buffer[bufferIndex] =
event->GetTarget(j);
365 for (
size_t i = 0; i < batchSize; i++) {
366 size_t sampleIndex = *sampleIterator++;
367 Event *
event = std::get<0>(
fData)[sampleIndex];
368 buffer[i] =
static_cast<double>(
event->GetWeight());
381 size_t sampleIndex = *sampleIterator;
384 buffer[bufferIndex] =
static_cast<float>(
inputTensor[0](sampleIndex, j));
390 size_t sampleIndex = *sampleIterator;
394 buffer[bufferIndex] =
static_cast<float>(
inputTensor[sampleIndex](j, k));
411 size_t sampleIndex = *sampleIterator;
412 for (
size_t j = 0; j <
n; j++) {
414 buffer[bufferIndex] =
static_cast<float>(
outputMatrix(sampleIndex, j));
427 buffer[i] =
static_cast<float>(
weightMatrix(*sampleIterator, 0));
441 size_t sampleIndex = *sampleIterator;
442 Event *
event = std::get<0>(
fData)[sampleIndex];
445 buffer[bufferIndex] =
event->GetValue(j);
452 size_t sampleIndex = *sampleIterator;
453 Event *
event = std::get<0>(
fData)[sampleIndex];
458 buffer[bufferIndex] =
event->GetValue(j *
fBatchWidth + k);
466 Error(
"TTensorDataLoader",
"Inconsistency between batch depth and batch size");
481 size_t sampleIndex = *sampleIterator++;
482 Event *
event = std::get<0>(
fData)[sampleIndex];
483 for (
size_t j = 0; j <
n; j++) {
487 if (event->GetNTargets() == 0) {
490 buffer[bufferIndex] = (info.
IsSignal(event)) ? 1.0 : 0.0;
493 buffer[bufferIndex] = 0.0;
494 if (j == event->GetClass()) {
495 buffer[bufferIndex] = 1.0;
499 buffer[bufferIndex] =
static_cast<Float_t>(
event->GetTarget(j));
511 size_t sampleIndex = *sampleIterator++;
512 Event *
event = std::get<0>(
fData)[sampleIndex];
513 buffer[i] =
event->GetWeight();
526 size_t sampleIndex = *sampleIterator;
529 buffer[bufferIndex] =
static_cast<float>(
inputTensor[0](sampleIndex, j));
535 size_t sampleIndex = *sampleIterator;
539 buffer[bufferIndex] =
static_cast<float>(
inputTensor[sampleIndex](j, k));
556 size_t sampleIndex = *sampleIterator;
557 for (
size_t j = 0; j <
n; j++) {
587 size_t sampleIndex = *sampleIterator;
588 Event *
event = std::get<0>(
fData)[sampleIndex];
591 buffer[bufferIndex] =
event->GetValue(j);
598 size_t sampleIndex = *sampleIterator;
599 Event *
event = std::get<0>(
fData)[sampleIndex];
604 buffer[bufferIndex] =
event->GetValue(j *
fBatchWidth + k);
612 Error(
"TTensorDataLoader",
"Inconsistency between batch depth and batch size");
628 size_t sampleIndex = *sampleIterator++;
629 Event *
event = std::get<0>(
fData)[sampleIndex];
630 for (
size_t j = 0; j <
n; j++) {
634 if (event->GetNTargets() == 0) {
637 buffer[bufferIndex] = (info.
IsSignal(event)) ? 1.0 : 0.0;
640 buffer[bufferIndex] = 0.0;
641 if (j == event->GetClass()) {
642 buffer[bufferIndex] = 1.0;
646 buffer[bufferIndex] =
static_cast<Double_t>(
event->GetTarget(j));
658 size_t sampleIndex = *sampleIterator++;
659 Event *
event = std::get<0>(
fData)[sampleIndex];
660 buffer[i] =
event->GetWeight();
671 DeviceBufferTuple DeviceBuffers = CopyTensorBatches();
673 std::vector<Matrix_t> inputTensor(std::get<0>(DeviceBuffers), fBatchSize, )
674 size_t jump = fBatchHeight * fBatchWidth;
675 for (
size_t i = 0; i < fBatchSize; i++) {
676 DeviceBuffer_t subInputDeviceBuffer = std::get<0>(DeviceBuffers).GetSubBuffer(i * jump, jump);
677 inputTensor.emplace_back(subInputDeviceBuffer, fBatchHeight, fBatchWidth);
679 Matrix_t outputMatrix(std::get<1>(DeviceBuffers), fBatchSize, fNOutputFeatures);
680 Matrix_t weightMatrix(std::get<2>(DeviceBuffers), fBatchSize, fNOutputFeatures);
688TTensorBatch<TCuda<double> > TTensorDataLoader<TensorInput, TCuda<double> >::GetTensorBatch()
692 DeviceBufferTuple DeviceBuffers = CopyTensorBatches();
694 std::vector<Matrix_t> inputTensor;
695 size_t jump = fBatchHeight * fBatchWidth;
696 for (
size_t i = 0; i < fBatchSize; i++) {
697 DeviceBuffer_t subInputDeviceBuffer = std::get<0>(DeviceBuffers).GetSubBuffer(i * jump, jump);
698 inputTensor.emplace_back(subInputDeviceBuffer, fBatchHeight, fBatchWidth);
700 Matrix_t outputMatrix(std::get<1>(DeviceBuffers), fBatchSize, fNOutputFeatures);
701 Matrix_t weightMatrix(std::get<2>(DeviceBuffers), fBatchSize, fNOutputFeatures);
713 DeviceBufferTuple DeviceBuffers = CopyTensorBatches();
715 std::vector<Matrix_t> inputTensor;
716 size_t jump = fBatchHeight * fBatchWidth;
717 for (
size_t i = 0; i < fBatchSize; i++) {
718 DeviceBuffer_t subInputDeviceBuffer = std::get<0>(DeviceBuffers).GetSubBuffer(i * jump, jump);
719 inputTensor.emplace_back(subInputDeviceBuffer, fBatchHeight, fBatchWidth);
721 Matrix_t outputMatrix(std::get<1>(DeviceBuffers), fBatchSize, fNOutputFeatures);
722 Matrix_t weightMatrix(std::get<2>(DeviceBuffers), fBatchSize, fNOutputFeatures);
734 DeviceBufferTuple DeviceBuffers = CopyTensorBatches();
736 std::vector<Matrix_t> inputTensor;
737 size_t jump = fBatchHeight * fBatchWidth;
738 for (
size_t i = 0; i < fBatchSize; i++) {
739 DeviceBuffer_t subInputDeviceBuffer = std::get<0>(DeviceBuffers).GetSubBuffer(i * jump, jump);
740 inputTensor.emplace_back(subInputDeviceBuffer, fBatchHeight, fBatchWidth);
742 Matrix_t outputMatrix(std::get<1>(DeviceBuffers), fBatchSize, fNOutputFeatures);
743 Matrix_t weightMatrix(std::get<2>(DeviceBuffers), fBatchSize, fNOutputFeatures);
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
double Double_t
Double 8 bytes.
float Float_t
Float 4 bytes (float).
Error("WriteTObject","The current directory (%s) is not associated with a file. The object (%s) has not been written.", GetName(), objname)
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
size_t fOffset
Offset for sub-buffers.
void CopyFrom(const TCudaHostBuffer< AFloat > &) const
void CopyTo(const TCudaHostBuffer< AFloat > &) const
struct TMVA::DNN::TCudaDeviceBuffer::TDestructor fDestructor
TCudaDeviceBuffer GetSubBuffer(size_t offset, size_t size)
Return sub-buffer of the current buffer.
cudaStream_t fComputeStream
cudaStream for data transfer
TCudaDeviceBuffer(size_t size)
std::shared_ptr< AFloat * > fDevicePointer
Pointer to the buffer data.
TCudaDeviceBuffer()=default
struct TMVA::DNN::TCudaHostBuffer::TDestructor fDestructor
TCudaHostBuffer(size_t size)
size_t fOffset
Offset for sub-buffers.
TCudaHostBuffer GetSubBuffer(size_t offset, size_t size)
Return sub-buffer of the current buffer.
TCudaHostBuffer()=default
cudaStream_t fComputeStream
cudaStream for data transfer
void SetConstVal(const AFloat constVal)
Sets the entire buffer to a constant value.
std::shared_ptr< AFloat * > fHostPointer
Pointer to the buffer data.
void CopyWeights(TMatrixT< AReal > &matrix, IndexIterator_t begin)
Copy weight matrix into the given host buffer.
void CopyOutput(TMatrixT< AReal > &matrix, IndexIterator_t begin)
Copy output matrix into the given host buffer.
void CopyInput(TMatrixT< AReal > &matrix, IndexIterator_t begin)
Copy input matrix into the given host buffer.
TMatrixT< AReal > outputMatrix
TMatrixT< AReal > inputMatrix
TDataLoader(const AData &data, size_t nSamples, size_t batchSize, size_t nInputFeatures, size_t nOutputFeatures, size_t nthreads=1)
TMatrixT< AReal > weightMatrix
TMatrixT< AReal > weightMatrix
The matrix used to keep the batch weights.
TTensorDataLoader(const AData &data, size_t nSamples, size_t batchDepth, size_t batchHeight, size_t batchWidth, size_t nOutputFeatures, std::vector< size_t > inputShape, size_t nStreams=1)
Constructor.
size_t fBatchHeight
The number od rows in each matrix.
const AData & fData
The data that should be loaded in the batches.
std::vector< TMatrixT< AReal > > inputTensor
The 3D tensor used to keep the input data.
void CopyTensorInput(std::vector< TMatrixT< AReal > > &tensor, IndexIterator_t sampleIterator)
Copy input tensor into the given host buffer.
TMatrixT< AReal > outputMatrix
The matrix used to keep the output.
size_t fBatchDepth
The number of matrices in the tensor.
size_t fBatchWidth
The number of columns in each matrix.
void CopyTensorWeights(TMatrixT< AReal > &matrix, IndexIterator_t sampleIterator)
Copy weight matrix into the given host buffer.
void CopyTensorOutput(TMatrixT< AReal > &matrix, IndexIterator_t sampleIterator)
Copy output matrix into the given host buffer.
Class that contains all the data information.
Bool_t IsSignal(const Event *ev) const
typename std::vector< size_t >::iterator IndexIterator_t
create variable transformations
void operator()(AFloat **devicePointer)
void operator()(AFloat **devicePointer)