Logo ROOT   6.10/09
Reference Guide
DataLoader.cxx
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Simon Pfreundschuh 21/07/16
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 //////////////////////////////////////////////////////////////////
13 // Implementation for the DataLoader for the the multi-threaded //
14 // CPU implementation of DNNs. //
15 //////////////////////////////////////////////////////////////////
16 
17 #include "TMVA/DNN/Architectures/Cpu/DataLoader.h"
18 #include "TMVA/Event.h"
19 #include <iostream>
20 
21 namespace TMVA
22 {
23 namespace DNN
24 {
25 
26 // TCpuBatchIterator
27 //______________________________________________________________________________
28 template<typename Data_t, typename Real_t>
29 TCpuBatchIterator<Data_t, Real_t>::TCpuBatchIterator(
30  TCpuDataLoader<Data_t, Real_t> & dataLoader,
31  size_t batchIndex)
32  : fDataLoader(dataLoader), fBatchIndex(batchIndex)
33 {
34  // Nothing to do here.
35 }
36 
37 //______________________________________________________________________________
38 template<typename Data_t, typename Real_t>
40 {
41  return fDataLoader.GetBatch(fBatchIndex);
42 }
43 
44 //______________________________________________________________________________
45 template<typename Data_t, typename Real_t>
46 TCpuBatchIterator<Data_t, Real_t> & TCpuBatchIterator<Data_t, Real_t>::operator++()
47 {
48  fBatchIndex++;
49  return *this;
50 }
51 
52 //______________________________________________________________________________
53 template<typename Data_t, typename Real_t>
54 bool TCpuBatchIterator<Data_t, Real_t>::operator!=(const TCpuBatchIterator & other)
55 {
56  return fBatchIndex != other.GetBatchIndex();
57 }
58 
59 //______________________________________________________________________________
60 template<typename Data_t, typename Real_t>
61 bool TCpuBatchIterator<Data_t, Real_t>::operator==(const TCpuBatchIterator & other)
62 {
63  return fBatchIndex == other.GetBatchIndex();
64 }
65 
66 // TCpuDataLoader
67 //______________________________________________________________________________
68 template<typename Data_t, typename Real_t>
69 TCpuDataLoader<Data_t, Real_t>::TCpuDataLoader(const Data_t &input,
70  size_t nsamples,
71  size_t batchSize,
72  size_t ninputFeatures,
73  size_t noutputFeatures,
74  size_t bufferSize)
75  : fInput(input), fNSamples(nsamples), fBatchSize(batchSize),
76  fBufferSize(bufferSize), fNInputFeatures(ninputFeatures),
77  fNOutputFeatures(noutputFeatures), fNBatches(nsamples / batchSize),
78  fInputMatrices(), fOutputMatrices(), fSampleIndices()
79 {
80  fInputMatrices.reserve(fBufferSize);
81  fOutputMatrices.reserve(fBufferSize);
82  for (size_t i = 0; i < fBufferSize; i++) {
83  fInputMatrices.emplace_back(fBatchSize, fNInputFeatures);
84  fOutputMatrices.emplace_back(fBatchSize, fNOutputFeatures);
85  }
86 
87  fSampleIndices.reserve(fNBatches);
88  for (size_t i = 0; i < fNSamples; i++) {
89  fSampleIndices.emplace_back(i);
90  }
91 }
92 
93 //______________________________________________________________________________
94 template<typename Data_t, typename Real_t>
95 inline void TCpuDataLoader<Data_t, Real_t>::CopyData(size_t batchIndex)
96 {
97  auto copy = [this](UInt_t workerID)
98  {
99  CopyBatch(this->fInputMatrices[workerID % this->fBufferSize],
100  this->fOutputMatrices[workerID % this->fBufferSize],
101  this->fInput,
102  this->fSampleIndices.begin() + sampleIndex,
103  this->fSampleIndices.begin() + sampleIndex + this->fBatchSize);
104  sampleIndex += this->fBatchSize;
105  return 0;
106  };
107 
108  size_t end = std::min(batchIndex + fBufferSize, fNBatches);
109  size_t start = batchIndex;
110  ROOT::TThreadExecutor pool{};
111  pool.Map(copy, ROOT::TSeqI(start, end));
112 }
113 
114 //______________________________________________________________________________
115 template<typename Data_t, typename Real_t>
116 TCpuBatch<Real_t> TCpuDataLoader<Data_t, Real_t>::GetBatch(size_t batchIndex)
117 {
118  size_t fBufferIndex = batchIndex % fBufferSize;
119  if (fBufferIndex == 0) {
120  CopyData(batchIndex);
121  }
122  return TCpuBatch<Real_t>(fInputMatrices[fBufferIndex],
123  fOutputMatrices[fBufferIndex]);
124 }
125 
126 //______________________________________________________________________________
127 template<typename Data_t, typename Real_t>
128 auto TCpuDataLoader<Data_t, Real_t>::begin()
129  -> BatchIterator_t
130 {
131  random_shuffle(fSampleIndices.begin(), fSampleIndices.end());
132  return BatchIterator_t(*this, 0);
133 }
134 
135 //______________________________________________________________________________
136 template<typename Data_t, typename Real_t>
137 auto TCpuDataLoader<Data_t, Real_t>::end()
138  -> BatchIterator_t
139 {
140  return BatchIterator_t(*this, fNBatches);
141 }
142 
143 //______________________________________________________________________________
144 template <>
145 void TCpuDataLoader<MatrixInput_t, Double_t>::CopyBatch(
146  Matrix_t &inputMatrix,
147  Matrix_t &outputMatrix,
148  const MatrixInput_t &input,
149  IndexIterator_t indexBegin,
150  IndexIterator_t indexEnd)
151 {
152  auto &in = std::get<0>(input);
153  auto &out = std::get<1>(input);
154 
155  size_t batchIndex = 0;
156  for (IndexIterator_t i = indexBegin; i != indexEnd; i++) {
157  size_t index = *i;
158  for (size_t j = 0; j < (size_t) in.GetNcols(); j++) {
159  inputMatrix(batchIndex, j) = in(index, j);
160  }
161  for (size_t j = 0; j < (size_t) out.GetNcols(); j++) {
162  outputMatrix(batchIndex, j) = out(index, j);
163  }
164  batchIndex++;
165  }
166 }
167 
168 //______________________________________________________________________________
169 template <>
170 void TCpuDataLoader<TMVAInput_t, Double_t>::CopyBatch(
171  Matrix_t &inputMatrix,
172  Matrix_t &outputMatrix,
173  const TMVAInput_t &input,
174  IndexIterator_t indexBegin,
175  IndexIterator_t indexEnd)
176 {
177  size_t batchIndex = 0;
178  for (IndexIterator_t i = indexBegin; i != indexEnd; i++) {
179  size_t index = *i;
180  Event *event = input.at(index);
181  for (size_t j = 0; j < event->GetNVariables(); j++) {
182  inputMatrix(batchIndex, j) = event->GetValue(j);
183  }
184  if (event->GetNTargets() > 0) {
185  for (size_t j = 0; j < event->GetNTargets(); j++) {
186  outputMatrix(batchIndex, j) = event->GetTarget(j);
187  }
188  } else {
189  outputMatrix(batchIndex, 0) = (event->GetClass() == 0) ? 1.0 : 0.0;
190  batchIndex++;
191  }
192  }
193 }
194 
195 // Explicit instantiation.
196 //______________________________________________________________________________
197 template class TCpuDataLoader<MatrixInput_t, Double_t>;
198 template class TCpuDataLoader<TMVAInput_t, Double_t>;
199 template class TCpuBatchIterator<MatrixInput_t, Double_t>;
200 template class TCpuBatchIterator<TMVAInput_t, Double_t>;
201 template class TCpuBatch<Double_t>;
202 
203 } // namespace DNN
204 } // namespace TMVA
typename std::vector< size_t >::iterator IndexIterator_t
Definition: DataLoader.h:38
Bool_t operator!=(const TDatime &d1, const TDatime &d2)
Definition: TDatime.h:104
This class provides a simple interface to execute the same task multiple times in parallel...
std::vector< Event * > TMVAInput_t
Definition: DataLoader.h:36
TTime operator*(const TTime &t1, const TTime &t2)
Definition: TTime.h:85
unsigned int UInt_t
Definition: RtypesCore.h:42
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66
Bool_t operator==(const TDatime &d1, const TDatime &d2)
Definition: TDatime.h:102
Abstract ClassifierFactory template that handles arbitrary types.
auto Map(F func, unsigned nTimes) -> std::vector< typename std::result_of< F()>::type >
Execute func (with no arguments) nTimes in parallel.
std::pair< const TMatrixT< Double_t > &, const TMatrixT< Double_t > & > MatrixInput_t
Definition: DataLoader.h:35