19 #include "cuda_runtime.h" 28 template<
typename AFloat>
31 cudaFreeHost(*devicePointer);
32 delete[] devicePointer;
36 template<
typename AFloat>
40 AFloat ** pointer =
new AFloat * [1];
41 cudaMallocHost(pointer, size *
sizeof(AFloat));
46 template<
typename AFloat>
53 template<
typename AFloat>
65 template<
typename AFloat>
68 cudaFree(*devicePointer);
69 delete[] devicePointer;
73 template<
typename AFloat>
77 AFloat ** pointer =
new AFloat * [1];
78 cudaMalloc(pointer, size *
sizeof(AFloat));
84 template<
typename AFloat>
89 AFloat ** pointer =
new AFloat * [1];
90 cudaMalloc(pointer, size *
sizeof(AFloat));
95 template<
typename AFloat>
101 AFloat ** pointer =
new AFloat * [1];
102 *pointer = devicePointer;
107 template<
typename AFloat>
118 template<
typename AFloat>
125 template<
typename AFloat>
129 cudaMemcpyAsync(*
this, buffer,
fSize *
sizeof(AFloat),
134 template<
typename AFloat>
137 cudaMemcpyAsync(*
this, buffer,
fSize *
sizeof(AFloat),
152 for (
size_t i = 0; i < batchSize; i++) {
153 size_t sampleIndex = *sampleIterator;
154 for (
size_t j = 0; j <
n; j++) {
155 size_t bufferIndex = j * batchSize + i;
156 buffer[bufferIndex] =
static_cast<float>(inputMatrix(sampleIndex, j));
172 for (
size_t i = 0; i < batchSize; i++) {
173 size_t sampleIndex = *sampleIterator;
174 for (
size_t j = 0; j <
n; j++) {
175 size_t bufferIndex = j * batchSize + i;
176 buffer[bufferIndex] =
static_cast<float>(outputMatrix(sampleIndex, j));
189 Event *
event = fData.front();
190 size_t n =
event->GetNVariables();
194 for (
size_t i = 0; i < batchSize; i++) {
195 size_t sampleIndex = * sampleIterator++;
196 event = fData[sampleIndex];
197 for (
size_t j = 0; j <
n; j++) {
198 size_t bufferIndex = j * batchSize + i;
199 buffer[bufferIndex] =
static_cast<float>(
event->GetValue(j));
211 Event *
event = fData.front();
212 size_t n = (
event->GetNTargets() == 0) ? 1 : event->GetNTargets();
216 for (
size_t i = 0; i < batchSize; i++) {
217 size_t sampleIndex = * sampleIterator++;
218 event = fData[sampleIndex];
219 for (
size_t j = 0; j <
n; j++) {
221 size_t bufferIndex = j * batchSize + i;
222 if (event->GetNTargets() == 0) {
223 buffer[bufferIndex] = (
event->GetClass() == 0) ? 1.0 : 0.0;
225 buffer[bufferIndex] =
static_cast<float>(
event->GetTarget(j));
241 for (
size_t i = 0; i < batchSize; i++) {
242 size_t sampleIndex = *sampleIterator;
243 for (
size_t j = 0; j <
n; j++) {
244 size_t bufferIndex = j * batchSize + i;
245 buffer[bufferIndex] = inputMatrix(sampleIndex, j);
261 for (
size_t i = 0; i < batchSize; i++) {
262 size_t sampleIndex = *sampleIterator;
263 for (
size_t j = 0; j <
n; j++) {
264 size_t bufferIndex = j * batchSize + i;
265 buffer[bufferIndex] = outputMatrix(sampleIndex, j);
278 Event *
event = fData.front();
279 size_t n =
event->GetNVariables();
283 for (
size_t i = 0; i < batchSize; i++) {
284 size_t sampleIndex = * sampleIterator++;
285 event = fData[sampleIndex];
286 for (
size_t j = 0; j <
n; j++) {
287 size_t bufferIndex = j * batchSize + i;
288 buffer[bufferIndex] =
event->GetValue(j);
300 Event *
event = fData.front();
301 size_t n = (
event->GetNTargets() == 0) ? 1 : event->GetNTargets();
305 for (
size_t i = 0; i < batchSize; i++) {
306 size_t sampleIndex = * sampleIterator++;
307 event = fData[sampleIndex];
308 for (
size_t j = 0; j <
n; j++) {
310 size_t bufferIndex = j * batchSize + i;
311 if (event->GetNTargets() == 0) {
312 buffer[bufferIndex] = (
event->GetClass() == 0) ? 1.0 : 0.0;
314 buffer[bufferIndex] =
event->GetTarget(j);
std::shared_ptr< AFloat * > fHostPointer
Pointer to the buffer data.
TCudaDeviceBuffer()=default
typename std::vector< size_t >::iterator IndexIterator_t
void CopyTo(const TCudaHostBuffer< AFloat > &) const
void CopyFrom(const TCudaHostBuffer< AFloat > &) const
size_t fOffset
Offset for sub-buffers.
struct TMVA::DNN::TCudaHostBuffer::TDestructor fDestructor
cudaStream_t fComputeStream
cudaStream for data transfer
struct TMVA::DNN::TCudaDeviceBuffer::TDestructor fDestructor
void operator()(AFloat **devicePointer)
void operator()(AFloat **devicePointer)
std::shared_ptr< AFloat * > fDevicePointer
Pointer to the buffer data.
TCudaHostBuffer()=default
Abstract ClassifierFactory template that handles arbitrary types.
TCudaDeviceBuffer GetSubBuffer(size_t offset, size_t size)
Return sub-buffer of the current buffer.
TCudaHostBuffer GetSubBuffer(size_t offset, size_t size)
Return sub-buffer of the current buffer.
size_t fOffset
Offset for sub-buffers.
cudaStream_t fComputeStream
cudaStream for data transfer