Logo ROOT  
Reference Guide
TensorDataLoader.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Vladimir Ilievski
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : TTensorDataLoader *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Tensor Data Loader Class *
12 * *
13 * Authors (alphabetical): *
14 * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
15 * *
16 * Copyright (c) 2005-2015: *
17 * CERN, Switzerland *
18 * U. of Victoria, Canada *
19 * MPI-K Heidelberg, Germany *
20 * U. of Bonn, Germany *
21 * *
22 * Redistribution and use in source and binary forms, with or without *
23 * modification, are permitted according to the terms listed in LICENSE *
24 * (http://tmva.sourceforge.net/LICENSE) *
25 **********************************************************************************/
26
27#ifndef TMVA_DNN_TENSORDATALOADER
28#define TMVA_DNN_TENSORDATALOADER
29
30#include "TMatrix.h"
31#include "TMVA/Event.h"
32#include <algorithm>
33#include <vector>
34#include <utility>
35
36namespace TMVA {
37 class DataSetInfo;
38namespace DNN {
39
40//
41// Input Data Types
42//______________________________________________________________________________
44 std::tuple<const std::vector<TMatrixT<Double_t>> &, const TMatrixT<Double_t> &, const TMatrixT<Double_t> &>;
45
46using TMVAInput_t = std::tuple<const std::vector<Event *> &, const DataSetInfo &>;
47using IndexIterator_t = typename std::vector<size_t>::iterator;
48
49/** TTensorBatch
50 *
51 * Class representing training batches consisting of a vector of matrices as input data
52 * and a matrix of output data. The input and output data can be accessed using
53 * the GetInput() and GetOutput() member functions.
54 *
55 * \tparam Architecture_t The underlying architecture.
56 */
57
58template <typename Architecture_t>
60public:
61 using Matrix_t = typename Architecture_t::Matrix_t;
62 using Tensor_t = typename Architecture_t::Tensor_t;
63
64private:
65 Tensor_t fInputTensor; ///< The input tensor batch, one matrix one input.
66 Matrix_t fOutputMatrix; ///< The output matrix representing the ground truth.
67 Matrix_t fWeightMatrix; ///< The event/example weights
68
69public:
71 TTensorBatch(const TTensorBatch &) = default;
73 TTensorBatch &operator=(const TTensorBatch &) = default;
75
76 /** Return the tensor representing the input data */
78 /** Return the matrix representing the output data. */
80 /** Return the matrix holding the event weights. */
82};
83
84template <typename Data_t, typename Architecture_t>
86
87/** TTensorBatchIterator
88 *
89 * Simple iterator class for the iterations over the training batches in
90 * a given data set represented by a TTensorDataLoader object.
91 *
92 * \tparam Data_t The input data type.
93 * \tparam Architecture_t The underlying architecture type.
94 */
95template <typename Data_t, typename Architecture_t>
97private:
100
101public:
103 : fTensorDataLoader(tensorDataLoader), fBatchIndex(index)
104 {
105 // Nothing to do here.
106 }
107
110 {
111 fBatchIndex++;
112 return *this;
113 }
114 bool operator!=(const TTensorBatchIterator &other) { return fBatchIndex != other.fBatchIndex; }
115};
116
117/** TTensorDataLoader
118 *
119 * Service class managing the streaming of the training data from the input data
120 * type to the accelerator device or the CPU. A TTensorDataLoader object manages
121 * a number of host and device buffer pairs that are used in a round-robin manner
122 * for the transfer of batches to the device.
123 *
124 * Each TTensorDataLoader object has an associated batch size and a number of total
125 * samples in the dataset. One epoch is the number of buffers required to transfer
126 * the complete training set. Using the begin() and end() member functions allows
127 * the user to iterate over the batches in one epoch.
128 *
129 * \tparam Data_t The input data type.
130 * \tparam Architecture_t The achitecture class of the underlying architecture.
131 */
132template <typename Data_t, typename Architecture_t>
134private:
135 using HostBuffer_t = typename Architecture_t::HostBuffer_t;
136 using DeviceBuffer_t = typename Architecture_t::DeviceBuffer_t;
137 using Matrix_t = typename Architecture_t::Matrix_t;
138 using Tensor_t = typename Architecture_t::Tensor_t;
139 using Shape_t = typename Architecture_t::Tensor_t::Shape_t;
141
142 const Data_t &fData; ///< The data that should be loaded in the batches.
143 size_t fNSamples; ///< The total number of samples in the dataset.
144 size_t fBatchSize; ///< The size of a batch.
145 Shape_t fInputLayout; // the input data layout (does not include batch size)
146 size_t fBatchDepth; ///< The number of matrices in the tensor.
147 size_t fBatchHeight; ///< The number od rows in each matrix.
148 size_t fBatchWidth; ///< The number of columns in each matrix.
149 size_t fNOutputFeatures; ///< The number of outputs from the classifier/regressor.
150 size_t fBatchIndex; ///< The index of the batch when there are multiple batches in parallel
151
152
153 size_t fNStreams; ///< Number of buffer pairs.
154 std::vector<DeviceBuffer_t> fDeviceBuffers; ///< The device buffers used to keep the input, output and weight data.
155 std::vector<HostBuffer_t> fHostBuffers; ///< The host buffers used to load the input, output and weight data.
156
157 std::vector<size_t> fSampleIndices; ///< Ordering of the samples in the epoch.
158
159public:
160 /*! Constructor. */
161 TTensorDataLoader(const Data_t &data, size_t nSamples, size_t batchSize, const Shape_t & inputLayout,
162 const Shape_t & batchLayout, size_t nOutputFeatures, size_t nStreams = 1);
163
168
169 /** Copy input tensor into the given host buffer. Function to be specialized by
170 * the architecture-specific backend. */
172 /** Copy output matrix into the given host buffer. Function to be specialized
173 * by the architecture-spcific backend. */
175 /** Copy weight matrix into the given host buffer. Function to be specialized
176 * by the architecture-spcific backend. */
178
181
182 /** Shuffle the order of the samples in the batch. The shuffling is indirect,
183 * i.e. only the indices are shuffled. No input data is moved by this
184 * routine. */
185 template<typename RNG>
186 void Shuffle(RNG & rng);
187
188 /** Return the next batch from the training set. The TTensorDataLoader object
189 * keeps an internal counter that cycles over the batches in the training
190 * set. */
192};
193
194//
195// TTensorBatch Class.
196//______________________________________________________________________________
197template <typename Architecture_t>
199 Matrix_t &weightMatrix)
200 : fInputTensor(inputTensor), fOutputMatrix(outputMatrix), fWeightMatrix(weightMatrix)
201{
202 // Nothing to do here.
203}
204
205//
206// TTensorDataLoader Class.
207//______________________________________________________________________________
208template <typename Data_t, typename Architecture_t>
209TTensorDataLoader<Data_t, Architecture_t>::TTensorDataLoader(const Data_t &data, size_t nSamples, size_t batchSize,
210 const Shape_t & inputLayout, const Shape_t & batchLayout,
211 size_t nOutputFeatures, size_t nStreams)
212 : fData(data), fNSamples(nSamples), fBatchSize(batchSize), fInputLayout(inputLayout), fBatchDepth(batchLayout[0]), fBatchHeight(batchLayout[1]),
213 fBatchWidth(batchLayout[2]), fNOutputFeatures(nOutputFeatures), fBatchIndex(0), fNStreams(nStreams), fDeviceBuffers(),
214 fHostBuffers(), fSampleIndices()
215{
216 size_t inputTensorSize = fBatchDepth * fBatchHeight * fBatchWidth;
217 size_t outputMatrixSize = fBatchSize * fNOutputFeatures;
218 size_t weightMatrixSize = fBatchSize;
219
220 for (size_t i = 0; i < fNStreams; i++) {
221 fHostBuffers.push_back(HostBuffer_t(inputTensorSize + outputMatrixSize + weightMatrixSize));
222 fDeviceBuffers.push_back(DeviceBuffer_t(inputTensorSize + outputMatrixSize + weightMatrixSize));
223 }
224
225 fSampleIndices.reserve(fNSamples);
226 for (size_t i = 0; i < fNSamples; i++) {
227 fSampleIndices.push_back(i);
228 }
229}
230
231//______________________________________________________________________________
232template <typename Data_t, typename Architecture_t>
234{
235 fBatchIndex %= (fNSamples / fBatchSize); // Cycle through samples.
236
237 size_t inputTensorSize = fBatchDepth * fBatchHeight * fBatchWidth;
238 size_t outputMatrixSize = fBatchSize * fNOutputFeatures;
239 size_t weightMatrixSize = fBatchSize;
240
241 size_t streamIndex = fBatchIndex % fNStreams;
242 HostBuffer_t &hostBuffer = fHostBuffers[streamIndex];
243 DeviceBuffer_t &deviceBuffer = fDeviceBuffers[streamIndex];
244
245 HostBuffer_t inputHostBuffer = hostBuffer.GetSubBuffer(0, inputTensorSize);
246 HostBuffer_t outputHostBuffer = hostBuffer.GetSubBuffer(inputTensorSize, outputMatrixSize);
247 HostBuffer_t weightHostBuffer = hostBuffer.GetSubBuffer(inputTensorSize + outputMatrixSize, weightMatrixSize);
248
249 DeviceBuffer_t inputDeviceBuffer = deviceBuffer.GetSubBuffer(0, inputTensorSize);
250 DeviceBuffer_t outputDeviceBuffer = deviceBuffer.GetSubBuffer(inputTensorSize, outputMatrixSize);
251 DeviceBuffer_t weightDeviceBuffer = deviceBuffer.GetSubBuffer(inputTensorSize + outputMatrixSize, weightMatrixSize);
252
253 // here sample index has batch size as offset , while in
254 // copy tensor input has batch depth.
255 // We support then now two cases: batchdepth = 1 batchHeight = batch size
256 // or batch depth = batch
257 size_t sampleIndex = fBatchIndex * fBatchSize;
258 IndexIterator_t sampleIndexIterator = fSampleIndices.begin() + sampleIndex;
259
260 CopyTensorInput(inputHostBuffer, sampleIndexIterator);
261 CopyTensorOutput(outputHostBuffer, sampleIndexIterator);
262 CopyTensorWeights(weightHostBuffer, sampleIndexIterator);
263
264 deviceBuffer.CopyFrom(hostBuffer);
265
266 assert(fInputLayout.size() == 3);
267 Tensor_t inputTensor = Architecture_t::CreateTensor( inputDeviceBuffer, fBatchSize, fInputLayout[0], fInputLayout[1], fInputLayout[2] );
268 // in case of dense layers
269 if (fBatchDepth == 1 && fBatchHeight == fBatchSize && fInputLayout[0] == 1 && fInputLayout[1] == 1){
270 inputTensor = Tensor_t( inputDeviceBuffer, {fBatchSize, fInputLayout.back() }, Tensor_t::MemoryLayout::ColumnMajor );
271 }
272
273 Matrix_t outputMatrix(outputDeviceBuffer, fBatchSize, fNOutputFeatures);
274 Matrix_t weightMatrix(weightDeviceBuffer, fBatchSize, 1);
275
276 fBatchIndex++;
277
278
279 return TTensorBatch<Architecture_t>(inputTensor, outputMatrix, weightMatrix);
280}
281
282//______________________________________________________________________________
283template <typename Data_t, typename Architecture_t>
284template <typename RNG>
286{
287 std::shuffle(fSampleIndices.begin(), fSampleIndices.end(), rng);
288}
289
290} // namespace DNN
291} // namespace TMVA
292
293#endif
TTensorBatchIterator(TTensorDataLoader< Data_t, Architecture_t > &tensorDataLoader, size_t index=0)
TTensorBatch< Architecture_t > operator*()
bool operator!=(const TTensorBatchIterator &other)
TTensorDataLoader< Data_t, Architecture_t > & fTensorDataLoader
TTensorBatchIterator operator++()
TTensorBatch & operator=(const TTensorBatch &)=default
Tensor_t fInputTensor
The input tensor batch, one matrix one input.
TTensorBatch(TTensorBatch &&)=default
TTensorBatch(Tensor_t &, Matrix_t &, Matrix_t &)
Matrix_t fWeightMatrix
The event/example weights.
TTensorBatch & operator=(TTensorBatch &&)=default
typename Architecture_t::Tensor_t Tensor_t
Matrix_t & GetWeights()
Return the matrix holding the event weights.
Matrix_t & GetOutput()
Return the matrix representing the output data.
TTensorBatch(const TTensorBatch &)=default
Matrix_t fOutputMatrix
The output matrix representing the ground truth.
Tensor_t & GetInput()
Return the tensor representing the input data.
typename Architecture_t::Matrix_t Matrix_t
typename Architecture_t::Tensor_t::Shape_t Shape_t
TTensorDataLoader(const TTensorDataLoader &)=default
void Shuffle(RNG &rng)
Shuffle the order of the samples in the batch.
size_t fNOutputFeatures
The number of outputs from the classifier/regressor.
std::vector< size_t > fSampleIndices
Ordering of the samples in the epoch.
std::vector< DeviceBuffer_t > fDeviceBuffers
The device buffers used to keep the input, output and weight data.
TTensorBatch< Architecture_t > GetTensorBatch()
Return the next batch from the training set.
TTensorDataLoader & operator=(const TTensorDataLoader &)=default
typename Architecture_t::DeviceBuffer_t DeviceBuffer_t
size_t fBatchWidth
The number of columns in each matrix.
void CopyTensorOutput(HostBuffer_t &buffer, IndexIterator_t begin)
Copy output matrix into the given host buffer.
size_t fBatchIndex
The index of the batch when there are multiple batches in parallel.
size_t fBatchHeight
The number od rows in each matrix.
std::vector< HostBuffer_t > fHostBuffers
The host buffers used to load the input, output and weight data.
TTensorDataLoader(const Data_t &data, size_t nSamples, size_t batchSize, const Shape_t &inputLayout, const Shape_t &batchLayout, size_t nOutputFeatures, size_t nStreams=1)
Constructor.
TTensorDataLoader(TTensorDataLoader &&)=default
size_t fBatchSize
The size of a batch.
void CopyTensorWeights(HostBuffer_t &buffer, IndexIterator_t begin)
Copy weight matrix into the given host buffer.
typename Architecture_t::Matrix_t Matrix_t
typename Architecture_t::HostBuffer_t HostBuffer_t
size_t fBatchDepth
The number of matrices in the tensor.
size_t fNStreams
Number of buffer pairs.
const Data_t & fData
The data that should be loaded in the batches.
TTensorDataLoader & operator=(TTensorDataLoader &&)=default
typename Architecture_t::Tensor_t Tensor_t
size_t fNSamples
The total number of samples in the dataset.
void CopyTensorInput(HostBuffer_t &buffer, IndexIterator_t begin)
Copy input tensor into the given host buffer.
Class that contains all the data information.
Definition: DataSetInfo.h:62
std::tuple< const std::vector< TMatrixT< Double_t > > &, const TMatrixT< Double_t > &, const TMatrixT< Double_t > & > TensorInput
typename std::vector< size_t >::iterator IndexIterator_t
Definition: DataLoader.h:42
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
Definition: DataLoader.h:40
create variable transformations