Logo ROOT   6.14/05
Reference Guide
TensorDataLoader.h
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Vladimir Ilievski
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : TTensorDataLoader *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Tensor Data Loader Class *
12  * *
13  * Authors (alphabetical): *
14  * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
15  * *
16  * Copyright (c) 2005-2015: *
17  * CERN, Switzerland *
18  * U. of Victoria, Canada *
19  * MPI-K Heidelberg, Germany *
20  * U. of Bonn, Germany *
21  * *
22  * Redistribution and use in source and binary forms, with or without *
23  * modification, are permitted according to the terms listed in LICENSE *
24  * (http://tmva.sourceforge.net/LICENSE) *
25  **********************************************************************************/
26 
27 #ifndef TMVA_DNN_TENSORDATALOADER
28 #define TMVA_DNN_TENSORDATALOADER
29 
30 #include "TMatrix.h"
31 #include "TMVA/Event.h"
32 #include <algorithm>
33 
34 namespace TMVA {
35  class DataSetInfo;
36 namespace DNN {
37 
38 //
39 // Input Data Types
40 //______________________________________________________________________________
41 using TensorInput =
42  std::tuple<const std::vector<TMatrixT<Double_t>> &, const TMatrixT<Double_t> &, const TMatrixT<Double_t> &>;
43 
44 using TMVAInput_t = std::tuple<const std::vector<Event *> &, const DataSetInfo &>;
45 using IndexIterator_t = typename std::vector<size_t>::iterator;
46 
47 /** TTensorBatch
48  *
49  * Class representing training batches consisting of a vector of matrices as input data
50  * and a matrix of output data. The input and output data can be accessed using
51  * the GetInput() and GetOutput() member functions.
52  *
53  * \tparam Architecture_t The underlying architecture.
54  */
55 
56 template <typename Architecture_t>
57 class TTensorBatch {
58 public:
59  using Matrix_t = typename Architecture_t::Matrix_t;
60 
61 private:
62  std::vector<Matrix_t> fInputTensor; ///< The input tensor batch, one matrix one input.
63  Matrix_t fOutputMatrix; ///< The output matrix representing the ground truth.
65 
66 public:
67  TTensorBatch(std::vector<Matrix_t> &, Matrix_t &, Matrix_t &);
68  TTensorBatch(const TTensorBatch &) = default;
69  TTensorBatch(TTensorBatch &&) = default;
70  TTensorBatch &operator=(const TTensorBatch &) = default;
71  TTensorBatch &operator=(TTensorBatch &&) = default;
72 
73  /** Return the tensor representing the input data */
74  std::vector<Matrix_t> &GetInput() { return fInputTensor; }
75  /** Return the matrix representing the output data. */
77  /** Return the matrix holding the event weights. */
79 };
80 
81 template <typename Data_t, typename Architecture_t>
83 
84 /** TTensorBatchIterator
85  *
86  * Simple iterator class for the iterations over the training batches in
87  * a given data set represented by a TTensorDataLoader object.
88  *
89  * \tparam Data_t The input data type.
90  * \tparam Architecture_t The underlying architecture type.
91  */
92 template <typename Data_t, typename Architecture_t>
94 private:
96  size_t fBatchIndex;
97 
98 public:
100  : fTensorDataLoader(tensorDataLoader), fBatchIndex(index)
101  {
102  // Nothing to do here.
103  }
104 
105  TTensorBatch<Architecture_t> operator*() { return fTensorDataLoader.GetTensorBatch(); }
107  {
108  fBatchIndex++;
109  return *this;
110  }
111  bool operator!=(const TTensorBatchIterator &other) { return fBatchIndex != other.fBatchIndex; }
112 };
113 
114 /** TTensorDataLoader
115  *
116  * Service class managing the streaming of the training data from the input data
117  * type to the accelerator device or the CPU. A TTensorDataLoader object manages
118  * a number of host and device buffer pairs that are used in a round-robin manner
119  * for the transfer of batches to the device.
120  *
121  * Each TTensorDataLoader object has an associated batch size and a number of total
122  * samples in the dataset. One epoch is the number of buffers required to transfer
123  * the complete training set. Using the begin() and end() member functions allows
124  * the user to iterate over the batches in one epoch.
125  *
126  * \tparam Data_t The input data type.
127  * \tparam Architecture_t The achitecture class of the underlying architecture.
128  */
129 template <typename Data_t, typename Architecture_t>
130 class TTensorDataLoader {
131 private:
132  using HostBuffer_t = typename Architecture_t::HostBuffer_t;
133  using DeviceBuffer_t = typename Architecture_t::DeviceBuffer_t;
134  using Matrix_t = typename Architecture_t::Matrix_t;
136 
137  const Data_t &fData; ///< The data that should be loaded in the batches.
138 
139  size_t fNSamples; ///< The total number of samples in the dataset.
140  size_t fBatchSize; ///< The size of a batch.
141  size_t fBatchDepth; ///< The number of matrices in the tensor.
142  size_t fBatchHeight; ///< The number od rows in each matrix.
143  size_t fBatchWidth; ///< The number of columns in each matrix.
144  size_t fNOutputFeatures; ///< The number of outputs from the classifier/regressor.
145  size_t fBatchIndex; ///< The index of the batch when there are multiple batches in parallel
146 
147  size_t fNStreams; ///< Number of buffer pairs.
148  std::vector<DeviceBuffer_t> fDeviceBuffers; ///< The device buffers used to keep the input, output and weight data.
149  std::vector<HostBuffer_t> fHostBuffers; ///< The host buffers used to load the input, output and weight data.
150 
151  std::vector<size_t> fSampleIndices; ///< Ordering of the samples in the epoch.
152 
153 public:
154  /*! Constructor. */
155  TTensorDataLoader(const Data_t &data, size_t nSamples, size_t batchSize, size_t batchDepth, size_t batchHeight,
156  size_t batchWidth, size_t nOutputFeatures, size_t nStreams = 1);
157 
158  TTensorDataLoader(const TTensorDataLoader &) = default;
159  TTensorDataLoader(TTensorDataLoader &&) = default;
160  TTensorDataLoader &operator=(const TTensorDataLoader &) = default;
162 
163  /** Copy input tensor into the given host buffer. Function to be specialized by
164  * the architecture-specific backend. */
165  void CopyTensorInput(HostBuffer_t &buffer, IndexIterator_t begin);
166  /** Copy output matrix into the given host buffer. Function to be specialized
167  * by the architecture-spcific backend. */
168  void CopyTensorOutput(HostBuffer_t &buffer, IndexIterator_t begin);
169  /** Copy weight matrix into the given host buffer. Function to be specialized
170  * by the architecture-spcific backend. */
171  void CopyTensorWeights(HostBuffer_t &buffer, IndexIterator_t begin);
172 
174  BatchIterator_t end() { return TTensorBatchIterator<Data_t, Architecture_t>(*this, fNSamples / fBatchSize); }
175 
176  /** Shuffle the order of the samples in the batch. The shuffling is indirect,
177  * i.e. only the indices are shuffled. No input data is moved by this
178  * routine. */
179  template<typename RNG>
180  void Shuffle(RNG & rng);
181 
182  /** Return the next batch from the training set. The TTensorDataLoader object
183  * keeps an internal counter that cycles over the batches in the training
184  * set. */
185  TTensorBatch<Architecture_t> GetTensorBatch();
186 };
187 
188 //
189 // TTensorBatch Class.
190 //______________________________________________________________________________
191 template <typename Architecture_t>
192 TTensorBatch<Architecture_t>::TTensorBatch(std::vector<Matrix_t> &inputTensor, Matrix_t &outputMatrix,
193  Matrix_t &weightMatrix)
194  : fInputTensor(inputTensor), fOutputMatrix(outputMatrix), fWeightMatrix(weightMatrix)
195 {
196  // Nothing to do here.
197 }
198 
199 //
200 // TTensorDataLoader Class.
201 //______________________________________________________________________________
202 template <typename Data_t, typename Architecture_t>
203 TTensorDataLoader<Data_t, Architecture_t>::TTensorDataLoader(const Data_t &data, size_t nSamples, size_t batchSize,
204  size_t batchDepth, size_t batchHeight, size_t batchWidth,
205  size_t nOutputFeatures, size_t nStreams)
206  : fData(data), fNSamples(nSamples), fBatchSize(batchSize), fBatchDepth(batchDepth), fBatchHeight(batchHeight),
207  fBatchWidth(batchWidth), fNOutputFeatures(nOutputFeatures), fBatchIndex(0), fNStreams(nStreams), fDeviceBuffers(),
208  fHostBuffers(), fSampleIndices()
209 {
210  size_t inputTensorSize = fBatchDepth * fBatchHeight * fBatchWidth;
211  size_t outputMatrixSize = fBatchSize * fNOutputFeatures;
212  size_t weightMatrixSize = fBatchSize;
213 
214  for (size_t i = 0; i < fNStreams; i++) {
215  fHostBuffers.push_back(HostBuffer_t(inputTensorSize + outputMatrixSize + weightMatrixSize));
216  fDeviceBuffers.push_back(DeviceBuffer_t(inputTensorSize + outputMatrixSize + weightMatrixSize));
217  }
218 
219  fSampleIndices.reserve(fNSamples);
220  for (size_t i = 0; i < fNSamples; i++) {
221  fSampleIndices.push_back(i);
222  }
223 }
224 
225 //______________________________________________________________________________
226 template <typename Data_t, typename Architecture_t>
228 {
229  fBatchIndex %= (fNSamples / fBatchSize); // Cycle through samples.
230 
231  size_t inputTensorSize = fBatchDepth * fBatchHeight * fBatchWidth;
232  size_t outputMatrixSize = fBatchSize * fNOutputFeatures;
233  size_t weightMatrixSize = fBatchSize;
234 
235  size_t streamIndex = fBatchIndex % fNStreams;
236  HostBuffer_t &hostBuffer = fHostBuffers[streamIndex];
237  DeviceBuffer_t &deviceBuffer = fDeviceBuffers[streamIndex];
238 
239  HostBuffer_t inputHostBuffer = hostBuffer.GetSubBuffer(0, inputTensorSize);
240  HostBuffer_t outputHostBuffer = hostBuffer.GetSubBuffer(inputTensorSize, outputMatrixSize);
241  HostBuffer_t weightHostBuffer = hostBuffer.GetSubBuffer(inputTensorSize + outputMatrixSize, weightMatrixSize);
242 
243  DeviceBuffer_t inputDeviceBuffer = deviceBuffer.GetSubBuffer(0, inputTensorSize);
244  DeviceBuffer_t outputDeviceBuffer = deviceBuffer.GetSubBuffer(inputTensorSize, outputMatrixSize);
245  DeviceBuffer_t weightDeviceBuffer = deviceBuffer.GetSubBuffer(inputTensorSize + outputMatrixSize, weightMatrixSize);
246 
247  // here sample index has batch size as offset , while in
248  // copy tensor input has batch depth.
249  // We support then now two cases: batchdepth = 1 batchHeight = batch size
250  // or batch depth = batch size
251  size_t sampleIndex = fBatchIndex * fBatchSize;
252  IndexIterator_t sampleIndexIterator = fSampleIndices.begin() + sampleIndex;
253 
254  CopyTensorInput(inputHostBuffer, sampleIndexIterator);
255  CopyTensorOutput(outputHostBuffer, sampleIndexIterator);
256  CopyTensorWeights(weightHostBuffer, sampleIndexIterator);
257 
258  deviceBuffer.CopyFrom(hostBuffer);
259 
260  std::vector<Matrix_t> inputTensor;
261  size_t jump = fBatchHeight * fBatchWidth;
262  for (size_t i = 0; i < fBatchDepth; i++) {
263  DeviceBuffer_t subInputDeviceBuffer = inputDeviceBuffer.GetSubBuffer(i * jump, jump);
264  inputTensor.emplace_back(subInputDeviceBuffer, fBatchHeight, fBatchWidth);
265  }
266  Matrix_t outputMatrix(outputDeviceBuffer, fBatchSize, fNOutputFeatures);
267  Matrix_t weightMatrix(weightDeviceBuffer, fBatchSize, fNOutputFeatures);
268 
269  fBatchIndex++;
270  return TTensorBatch<Architecture_t>(inputTensor, outputMatrix, weightMatrix);
271 }
272 
273 //______________________________________________________________________________
274 template <typename Data_t, typename Architecture_t>
275 template <typename RNG>
277 {
278  std::shuffle(fSampleIndices.begin(), fSampleIndices.end(), rng);
279 }
280 
281 } // namespace DNN
282 } // namespace TMVA
283 
284 #endif
TTensorDataLoader< Data_t, Architecture_t > & fTensorDataLoader
std::vector< Matrix_t > & GetInput()
Return the tensor representing the input data.
TTensorBatch(std::vector< Matrix_t > &, Matrix_t &, Matrix_t &)
std::vector< DeviceBuffer_t > fDeviceBuffers
The device buffers used to keep the input, output and weight data.
bool operator!=(const TTensorBatchIterator &other)
void CopyTensorInput(HostBuffer_t &buffer, IndexIterator_t begin)
Copy input tensor into the given host buffer.
typename Architecture_t::Matrix_t Matrix_t
TTensorBatch< Architecture_t > GetTensorBatch()
Return the next batch from the training set.
std::vector< Matrix_t > fInputTensor
The input tensor batch, one matrix one input.
size_t fBatchHeight
The number od rows in each matrix.
std::vector< HostBuffer_t > fHostBuffers
The host buffers used to load the input, output and weight data.
Class that contains all the data information.
Definition: DataSetInfo.h:60
typename std::vector< size_t >::iterator IndexIterator_t
Definition: DataLoader.h:42
TTensorBatchIterator(TTensorDataLoader< Data_t, Architecture_t > &tensorDataLoader, size_t index=0)
Matrix_t & GetWeights()
Return the matrix holding the event weights.
size_t fBatchIndex
The index of the batch when there are multiple batches in parallel.
TTensorBatchIterator operator++()
TTensorBatch & operator=(const TTensorBatch &)=default
void CopyTensorWeights(HostBuffer_t &buffer, IndexIterator_t begin)
Copy weight matrix into the given host buffer.
std::tuple< const std::vector< TMatrixT< Double_t > > &, const TMatrixT< Double_t > &, const TMatrixT< Double_t > & > TensorInput
typename Architecture_t::Matrix_t Matrix_t
TTensorDataLoader(const Data_t &data, size_t nSamples, size_t batchSize, size_t batchDepth, size_t batchHeight, size_t batchWidth, size_t nOutputFeatures, size_t nStreams=1)
Constructor.
void CopyTensorOutput(HostBuffer_t &buffer, IndexIterator_t begin)
Copy output matrix into the given host buffer.
typename Architecture_t::DeviceBuffer_t DeviceBuffer_t
Matrix_t & GetOutput()
Return the matrix representing the output data.
size_t fBatchDepth
The number of matrices in the tensor.
void Shuffle(RNG &rng)
Shuffle the order of the samples in the batch.
typename Architecture_t::HostBuffer_t HostBuffer_t
Abstract ClassifierFactory template that handles arbitrary types.
size_t fNSamples
The total number of samples in the dataset.
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
Definition: DataLoader.h:40
Matrix_t fOutputMatrix
The output matrix representing the ground truth.
size_t fBatchWidth
The number of columns in each matrix.
size_t fBatchSize
The size of a batch.
const Data_t & fData
The data that should be loaded in the batches.
size_t fNOutputFeatures
The number of outputs from the classifier/regressor.
size_t fNStreams
Number of buffer pairs.
TTensorBatch< Architecture_t > operator*()
std::vector< size_t > fSampleIndices
Ordering of the samples in the epoch.