20 #include "cuda_runtime.h" 29 template<
typename AFloat>
32 cudaFreeHost(*devicePointer);
33 delete[] devicePointer;
37 template<
typename AFloat>
39 : fOffset(0), fSize(size), fComputeStream(0), fDestructor()
41 AFloat ** pointer =
new AFloat * [1];
42 cudaMallocHost(pointer, size *
sizeof(AFloat));
47 template<
typename AFloat>
54 template<
typename AFloat>
67 template<
typename AFloat>
70 cudaFree(*devicePointer);
71 delete[] devicePointer;
75 template<
typename AFloat>
79 AFloat ** pointer =
new AFloat * [1];
80 cudaMalloc(pointer, size *
sizeof(AFloat));
86 template<
typename AFloat>
91 AFloat ** pointer =
new AFloat * [1];
92 cudaMalloc(pointer, size *
sizeof(AFloat));
97 template<
typename AFloat>
103 AFloat ** pointer =
new AFloat * [1];
104 *pointer = devicePointer;
109 template<
typename AFloat>
120 template<
typename AFloat>
127 template<
typename AFloat>
131 cudaMemcpyAsync(*
this, buffer,
fSize *
sizeof(AFloat),
136 template<
typename AFloat>
139 cudaMemcpyAsync(*
this, buffer,
fSize *
sizeof(AFloat),
154 for (
size_t i = 0; i < batchSize; i++) {
155 size_t sampleIndex = *sampleIterator;
156 for (
size_t j = 0; j <
n; j++) {
157 size_t bufferIndex = j * batchSize + i;
158 buffer[bufferIndex] =
static_cast<float>(inputMatrix(sampleIndex, j));
174 for (
size_t i = 0; i < batchSize; i++) {
175 size_t sampleIndex = *sampleIterator;
176 for (
size_t j = 0; j <
n; j++) {
177 size_t bufferIndex = j * batchSize + i;
178 buffer[bufferIndex] =
static_cast<float>(outputMatrix(sampleIndex, j));
190 for (
size_t i = 0; i < batchSize; i++) {
191 buffer[i] =
static_cast<float>(weightMatrix(*sampleIterator, 0));
203 Event *
event = std::get<0>(fData)[0];
204 size_t n =
event->GetNVariables();
205 for (
size_t i = 0; i < batchSize; i++) {
206 size_t sampleIndex = * sampleIterator++;
207 event = std::get<0>(fData)[sampleIndex];
208 for (
size_t j = 0; j <
n; j++) {
209 size_t bufferIndex = j * batchSize + i;
210 buffer[bufferIndex] =
static_cast<float>(
event->GetValue(j));
223 size_t n = buffer.
GetSize() / batchSize;
227 for (
size_t i = 0; i < batchSize; i++) {
228 size_t sampleIndex = *sampleIterator++;
229 Event *
event = std::get<0>(fData)[sampleIndex];
230 for (
size_t j = 0; j <
n; j++) {
232 size_t bufferIndex = j * batchSize + i;
234 if (event->GetNTargets() == 0) {
237 buffer[bufferIndex] = (info.
IsSignal(event)) ? 1.0 : 0.0;
240 buffer[bufferIndex] = 0.0;
241 if (j == event->GetClass()) {
242 buffer[bufferIndex] = 1.0;
246 buffer[bufferIndex] =
static_cast<float>(
event->GetTarget(j));
257 for (
size_t i = 0; i < batchSize; i++) {
258 size_t sampleIndex = *sampleIterator++;
259 Event *
event = std::get<0>(fData)[sampleIndex];
260 buffer[i] =
static_cast<float>(
event->GetWeight());
272 for (
size_t i = 0; i < batchSize; i++) {
273 size_t sampleIndex = *sampleIterator;
274 for (
size_t j = 0; j <
n; j++) {
275 size_t bufferIndex = j * batchSize + i;
276 buffer[bufferIndex] = inputMatrix(sampleIndex, j);
292 for (
size_t i = 0; i < batchSize; i++) {
293 size_t sampleIndex = *sampleIterator;
294 for (
size_t j = 0; j <
n; j++) {
295 size_t bufferIndex = j * batchSize + i;
296 buffer[bufferIndex] = outputMatrix(sampleIndex, j);
308 for (
size_t i = 0; i < batchSize; i++) {
309 buffer[i] =
static_cast<double>(weightMatrix(*sampleIterator, 0));
319 Event *
event = std::get<0>(fData)[0];
320 size_t n =
event->GetNVariables();
321 for (
size_t i = 0; i < batchSize; i++) {
322 size_t sampleIndex = * sampleIterator++;
323 event = std::get<0>(fData)[sampleIndex];
324 for (
size_t j = 0; j <
n; j++) {
325 size_t bufferIndex = j * batchSize + i;
326 buffer[bufferIndex] =
event->GetValue(j);
339 size_t n = buffer.
GetSize() / batchSize;
343 for (
size_t i = 0; i < batchSize; i++) {
344 size_t sampleIndex = *sampleIterator++;
345 Event *
event = std::get<0>(fData)[sampleIndex];
346 for (
size_t j = 0; j <
n; j++) {
348 size_t bufferIndex = j * batchSize + i;
350 if (event->GetNTargets() == 0) {
353 buffer[bufferIndex] = (info.
IsSignal(event)) ? 1.0 : 0.0;
356 buffer[bufferIndex] = 0.0;
357 if (j == event->GetClass()) {
358 buffer[bufferIndex] = 1.0;
362 buffer[bufferIndex] =
event->GetTarget(j);
373 for (
size_t i = 0; i < batchSize; i++) {
374 size_t sampleIndex = *sampleIterator++;
375 Event *
event = std::get<0>(fData)[sampleIndex];
376 buffer[i] =
static_cast<double>(
event->GetWeight());
std::shared_ptr< AFloat * > fHostPointer
Pointer to the buffer data.
TCudaDeviceBuffer()=default
typename std::vector< size_t >::iterator IndexIterator_t
size_t fOffset
Offset for sub-buffers.
void CopyTo(const TCudaHostBuffer< AFloat > &) const
struct TMVA::DNN::TCudaHostBuffer::TDestructor fDestructor
cudaStream_t fComputeStream
cudaStream for data transfer
Class that contains all the data information.
struct TMVA::DNN::TCudaDeviceBuffer::TDestructor fDestructor
void CopyFrom(const TCudaHostBuffer< AFloat > &) const
void operator()(AFloat **devicePointer)
void operator()(AFloat **devicePointer)
std::shared_ptr< AFloat * > fDevicePointer
Pointer to the buffer data.
TCudaHostBuffer()=default
Abstract ClassifierFactory template that handles arbitrary types.
TCudaDeviceBuffer GetSubBuffer(size_t offset, size_t size)
Return sub-buffer of the current buffer.
Bool_t IsSignal(const Event *ev) const
TCudaHostBuffer GetSubBuffer(size_t offset, size_t size)
Return sub-buffer of the current buffer.
size_t fOffset
Offset for sub-buffers.
cudaStream_t fComputeStream
cudaStream for data transfer