Logo ROOT   6.16/01
Reference Guide
DataLoader.cxx
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Simon Pfreundschuh 21/07/16
3
4/*************************************************************************
5 * Copyright (C) 2016, Simon Pfreundschuh *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12//////////////////////////////////////////////////////////////////
13// Implementation for the DataLoader for the the multi-threaded //
14// CPU implementation of DNNs. //
15//////////////////////////////////////////////////////////////////
16
17#include "TMVA/DNN/Architectures/Cpu/DataLoader.h"
18#include "TMVA/Event.h"
19#include <iostream>
20#include <random>
21
22namespace TMVA
23{
24namespace DNN
25{
26
27// TCpuBatchIterator
28//______________________________________________________________________________
29template<typename Data_t, typename Real_t>
30TCpuBatchIterator<Data_t, Real_t>::TCpuBatchIterator(
31 TCpuDataLoader<Data_t, Real_t> & dataLoader,
32 size_t batchIndex)
33 : fDataLoader(dataLoader), fBatchIndex(batchIndex)
34{
35 // Nothing to do here.
36}
37
38//______________________________________________________________________________
39template<typename Data_t, typename Real_t>
41{
42 return fDataLoader.GetBatch(fBatchIndex);
43}
44
45//______________________________________________________________________________
46template<typename Data_t, typename Real_t>
47TCpuBatchIterator<Data_t, Real_t> & TCpuBatchIterator<Data_t, Real_t>::operator++()
48{
49 fBatchIndex++;
50 return *this;
51}
52
53//______________________________________________________________________________
54template<typename Data_t, typename Real_t>
55bool TCpuBatchIterator<Data_t, Real_t>::operator!=(const TCpuBatchIterator & other)
56{
57 return fBatchIndex != other.GetBatchIndex();
58}
59
60//______________________________________________________________________________
61template<typename Data_t, typename Real_t>
62bool TCpuBatchIterator<Data_t, Real_t>::operator==(const TCpuBatchIterator & other)
63{
64 return fBatchIndex == other.GetBatchIndex();
65}
66
67// TCpuDataLoader
68//______________________________________________________________________________
69template<typename Data_t, typename Real_t>
70TCpuDataLoader<Data_t, Real_t>::TCpuDataLoader(const Data_t &input,
71 size_t nsamples,
72 size_t batchSize,
73 size_t ninputFeatures,
74 size_t noutputFeatures,
75 size_t bufferSize)
76 : fInput(input), fNSamples(nsamples), fBatchSize(batchSize),
77 fBufferSize(bufferSize), fNInputFeatures(ninputFeatures),
78 fNOutputFeatures(noutputFeatures), fNBatches(nsamples / batchSize),
79 fInputMatrices(), fOutputMatrices(), fSampleIndices()
80{
81 fInputMatrices.reserve(fBufferSize);
82 fOutputMatrices.reserve(fBufferSize);
83 for (size_t i = 0; i < fBufferSize; i++) {
84 fInputMatrices.emplace_back(fBatchSize, fNInputFeatures);
85 fOutputMatrices.emplace_back(fBatchSize, fNOutputFeatures);
86 }
87
88 fSampleIndices.reserve(fNBatches);
89 for (size_t i = 0; i < fNSamples; i++) {
90 fSampleIndices.emplace_back(i);
91 }
92}
93
94//______________________________________________________________________________
95template<typename Data_t, typename Real_t>
96inline void TCpuDataLoader<Data_t, Real_t>::CopyData(size_t batchIndex)
97{
98 auto copy = [this](UInt_t workerID)
99 {
100 CopyBatch(this->fInputMatrices[workerID % this->fBufferSize],
101 this->fOutputMatrices[workerID % this->fBufferSize],
102 this->fInput,
103 this->fSampleIndices.begin() + sampleIndex,
104 this->fSampleIndices.begin() + sampleIndex + this->fBatchSize);
105 sampleIndex += this->fBatchSize;
106 return 0;
107 };
108
109 size_t end = std::min(batchIndex + fBufferSize, fNBatches);
110 size_t start = batchIndex;
112 pool.Map(copy, ROOT::TSeqI(start, end));
113}
114
115//______________________________________________________________________________
116template<typename Data_t, typename Real_t>
117TCpuBatch<Real_t> TCpuDataLoader<Data_t, Real_t>::GetBatch(size_t batchIndex)
118{
119 size_t fBufferIndex = batchIndex % fBufferSize;
120 if (fBufferIndex == 0) {
121 CopyData(batchIndex);
122 }
123 return TCpuBatch<Real_t>(fInputMatrices[fBufferIndex],
124 fOutputMatrices[fBufferIndex]);
125}
126
127//______________________________________________________________________________
128template<typename Data_t, typename Real_t>
129auto TCpuDataLoader<Data_t, Real_t>::begin()
130 -> BatchIterator_t
131{
132 std::shuffle(fSampleIndices.begin(), fSampleIndices.end(), std::default_random_engine{});
133 return BatchIterator_t(*this, 0);
134}
135
136//______________________________________________________________________________
137template<typename Data_t, typename Real_t>
138auto TCpuDataLoader<Data_t, Real_t>::end()
139 -> BatchIterator_t
140{
141 return BatchIterator_t(*this, fNBatches);
142}
143
144//______________________________________________________________________________
145template <>
146void TCpuDataLoader<MatrixInput_t, Double_t>::CopyBatch(
147 Matrix_t &inputMatrix,
148 Matrix_t &outputMatrix,
149 const MatrixInput_t &input,
150 IndexIterator_t indexBegin,
151 IndexIterator_t indexEnd)
152{
153 auto &in = std::get<0>(input);
154 auto &out = std::get<1>(input);
155
156 size_t batchIndex = 0;
157 for (IndexIterator_t i = indexBegin; i != indexEnd; i++) {
158 size_t index = *i;
159 for (size_t j = 0; j < (size_t) in.GetNcols(); j++) {
160 inputMatrix(batchIndex, j) = in(index, j);
161 }
162 for (size_t j = 0; j < (size_t) out.GetNcols(); j++) {
163 outputMatrix(batchIndex, j) = out(index, j);
164 }
165 batchIndex++;
166 }
167}
168
169//______________________________________________________________________________
170template <>
171void TCpuDataLoader<TMVAInput_t, Double_t>::CopyBatch(
172 Matrix_t &inputMatrix,
173 Matrix_t &outputMatrix,
174 const TMVAInput_t &input,
175 IndexIterator_t indexBegin,
176 IndexIterator_t indexEnd)
177{
178 size_t batchIndex = 0;
179 for (IndexIterator_t i = indexBegin; i != indexEnd; i++) {
180 size_t index = *i;
181 Event *event = input.at(index);
182 for (size_t j = 0; j < event->GetNVariables(); j++) {
183 inputMatrix(batchIndex, j) = event->GetValue(j);
184 }
185 if (event->GetNTargets() > 0) {
186 for (size_t j = 0; j < event->GetNTargets(); j++) {
187 outputMatrix(batchIndex, j) = event->GetTarget(j);
188 }
189 } else {
190 outputMatrix(batchIndex, 0) = (event->GetClass() == 0) ? 1.0 : 0.0;
191 batchIndex++;
192 }
193 }
194}
195
196// Explicit instantiation.
197//______________________________________________________________________________
198template class TCpuDataLoader<MatrixInput_t, Double_t>;
199template class TCpuDataLoader<TMVAInput_t, Double_t>;
200template class TCpuBatchIterator<MatrixInput_t, Double_t>;
201template class TCpuBatchIterator<TMVAInput_t, Double_t>;
202template class TCpuBatch<Double_t>;
203
204} // namespace DNN
205} // namespace TMVA
unsigned int UInt_t
Definition: RtypesCore.h:42
Bool_t operator!=(const TDatime &d1, const TDatime &d2)
Definition: TDatime.h:104
Bool_t operator==(const TDatime &d1, const TDatime &d2)
Definition: TDatime.h:102
TTime operator*(const TTime &t1, const TTime &t2)
Definition: TTime.h:85
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66
This class provides a simple interface to execute the same task multiple times in parallel,...
auto Map(F func, unsigned nTimes) -> std::vector< typename std::result_of< F()>::type >
Execute func (with no arguments) nTimes in parallel.
typename std::vector< size_t >::iterator IndexIterator_t
Definition: DataLoader.h:42
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
Definition: DataLoader.h:40
std::tuple< const TMatrixT< Double_t > &, const TMatrixT< Double_t > &, const TMatrixT< Double_t > & > MatrixInput_t
Definition: DataLoader.h:38
Abstract ClassifierFactory template that handles arbitrary types.