Logo ROOT   6.14/05
Reference Guide
TensorDataLoader.h
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Vladimir Ilievski
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : TTensorDataLoader *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Specialization of the Tensor Data Loader Class *
12  * *
13  * Authors (alphabetical): *
14  * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
15  * *
16  * Copyright (c) 2005-2015: *
17  * CERN, Switzerland *
18  * U. of Victoria, Canada *
19  * MPI-K Heidelberg, Germany *
20  * U. of Bonn, Germany *
21  * *
22  * Redistribution and use in source and binary forms, with or without *
23  * modification, are permitted according to the terms listed in LICENSE *
24  * (http://tmva.sourceforge.net/LICENSE) *
25  **********************************************************************************/
26 
27 //////////////////////////////////////////////////////////////////////////
28 // Partial specialization of the TTensorDataLoader class to adapt //
29 // it to the TMatrix class. Also the data transfer is kept simple, //
30 // since this implementation (being intended as reference and fallback) //
31 // is not optimized for performance. //
32 //////////////////////////////////////////////////////////////////////////
33 
34 #ifndef TMVA_DNN_ARCHITECTURES_REFERENCE_TENSORDATALOADER
35 #define TMVA_DNN_ARCHITECTURES_REFERENCE_TENSORDATALOADER
36 
38 #include <iostream>
39 
40 namespace TMVA {
41 namespace DNN {
42 
43 template <typename AReal>
44 class TReference;
45 
46 template <typename AData, typename AReal>
47 class TTensorDataLoader<AData, TReference<AReal>> {
48 private:
50 
51  const AData &fData; ///< The data that should be loaded in the batches.
52 
53  size_t fNSamples; ///< The total number of samples in the dataset.
54  size_t fBatchSize; ///< The size of a batch.
55  size_t fBatchDepth; ///< The number of matrices in the tensor.
56  size_t fBatchHeight; ///< The number od rows in each matrix.
57  size_t fBatchWidth; ///< The number of columns in each matrix.
58  size_t fNOutputFeatures; ///< The number of outputs from the classifier/regressor.
59  size_t fBatchIndex; ///< The index of the batch when there are multiple batches in parallel.
60 
61  std::vector<TMatrixT<AReal>> inputTensor; ///< The 3D tensor used to keep the input data.
62  TMatrixT<AReal> outputMatrix; ///< The matrix used to keep the output.
63  TMatrixT<AReal> weightMatrix; ///< The matrix used to keep the batch weights.
64 
65  std::vector<size_t> fSampleIndices; ///< Ordering of the samples in the epoch.
66 
67 public:
68  /*! Constructor. */
69  TTensorDataLoader(const AData &data, size_t nSamples, size_t batchSize, size_t batchDepth, size_t batchHeight,
70  size_t batchWidth, size_t nOutputFeatures, size_t nStreams = 1);
71 
72  TTensorDataLoader(const TTensorDataLoader &) = default;
74  TTensorDataLoader &operator=(const TTensorDataLoader &) = default;
76 
77  /** Copy input tensor into the given host buffer. Function to be specialized by
78  * the architecture-specific backend. */
79  void CopyTensorInput(std::vector<TMatrixT<AReal>> &tensor, IndexIterator_t sampleIterator);
80  /** Copy output matrix into the given host buffer. Function to be specialized
81  * by the architecture-spcific backend. */
82  void CopyTensorOutput(TMatrixT<AReal> &matrix, IndexIterator_t sampleIterator);
83  /** Copy weight matrix into the given host buffer. Function to be specialized
84  * by the architecture-spcific backend. */
85  void CopyTensorWeights(TMatrixT<AReal> &matrix, IndexIterator_t sampleIterator);
86 
87  BatchIterator_t begin() { return BatchIterator_t(*this); }
88  BatchIterator_t end() { return BatchIterator_t(*this, fNSamples / fBatchSize); }
89 
90  /** Shuffle the order of the samples in the batch. The shuffling is indirect,
91  * i.e. only the indices are shuffled. No input data is moved by this
92  * routine. */
93  template<typename RNG>
94  void Shuffle(RNG & rng);
95 
96  /** Return the next batch from the training set. The TTensorDataLoader object
97  * keeps an internal counter that cycles over the batches in the training
98  * set. */
100 };
101 
102 //
103 // TTensorDataLoader Class.
104 //______________________________________________________________________________
105 template <typename AData, typename AReal>
106 TTensorDataLoader<AData, TReference<AReal>>::TTensorDataLoader(const AData &data, size_t nSamples, size_t batchSize,
107  size_t batchDepth, size_t batchHeight, size_t batchWidth,
108  size_t nOutputFeatures, size_t /* nStreams */)
109  : fData(data), fNSamples(nSamples), fBatchSize(batchSize), fBatchDepth(batchDepth), fBatchHeight(batchHeight),
110  fBatchWidth(batchWidth), fNOutputFeatures(nOutputFeatures), fBatchIndex(0), inputTensor(),
111  outputMatrix(batchSize, nOutputFeatures), weightMatrix(batchSize, 1), fSampleIndices()
112 {
113 
114  inputTensor.reserve(batchDepth);
115  for (size_t i = 0; i < batchDepth; i++) {
116  inputTensor.emplace_back(batchHeight, batchWidth);
117  }
118 
119  fSampleIndices.reserve(fNSamples);
120  for (size_t i = 0; i < fNSamples; i++) {
121  fSampleIndices.push_back(i);
122  }
123 }
124 
125 template <typename AData, typename AReal>
126 template <typename RNG>
128 {
129  std::shuffle(fSampleIndices.begin(), fSampleIndices.end(), rng);
130 }
131 
132 template <typename AData, typename AReal>
134 {
135  fBatchIndex %= (fNSamples / fBatchSize); // Cycle through samples.
136 
137  size_t sampleIndex = fBatchIndex * fBatchSize;
138  IndexIterator_t sampleIndexIterator = fSampleIndices.begin() + sampleIndex;
139 
140  CopyTensorInput(inputTensor, sampleIndexIterator);
141  CopyTensorOutput(outputMatrix, sampleIndexIterator);
142  CopyTensorWeights(weightMatrix, sampleIndexIterator);
143 
144  fBatchIndex++;
145  return TTensorBatch<TReference<AReal>>(inputTensor, outputMatrix, weightMatrix);
146 }
147 
148 } // namespace DNN
149 } // namespace TMVA
150 
151 #endif
TMatrixT< AReal > weightMatrix
The matrix used to keep the batch weights.
std::vector< size_t > fSampleIndices
Ordering of the samples in the epoch.
void CopyTensorInput(HostBuffer_t &buffer, IndexIterator_t begin)
Copy input tensor into the given host buffer.
The reference architecture class.
Definition: DataLoader.h:30
TTensorBatch< Architecture_t > GetTensorBatch()
Return the next batch from the training set.
size_t fBatchHeight
The number od rows in each matrix.
typename std::vector< size_t >::iterator IndexIterator_t
Definition: DataLoader.h:42
size_t fBatchIndex
The index of the batch when there are multiple batches in parallel.
void CopyTensorWeights(HostBuffer_t &buffer, IndexIterator_t begin)
Copy weight matrix into the given host buffer.
TTensorDataLoader(const Data_t &data, size_t nSamples, size_t batchSize, size_t batchDepth, size_t batchHeight, size_t batchWidth, size_t nOutputFeatures, size_t nStreams=1)
Constructor.
size_t fBatchWidth
The number of columns in each matrix.
void CopyTensorOutput(HostBuffer_t &buffer, IndexIterator_t begin)
Copy output matrix into the given host buffer.
size_t fBatchDepth
The number of matrices in the tensor.
size_t fBatchIndex
The index of the batch when there are multiple batches in parallel.
size_t fNOutputFeatures
The number of outputs from the classifier/regressor.
TMatrixT< AReal > outputMatrix
The matrix used to keep the output.
const AData & fData
The data that should be loaded in the batches.
void Shuffle(RNG &rng)
Shuffle the order of the samples in the batch.
TTensorDataLoader & operator=(const TTensorDataLoader &)=default
TTensorBatchIterator< Data_t, Architecture_t > BatchIterator_t
Abstract ClassifierFactory template that handles arbitrary types.
size_t fNSamples
The total number of samples in the dataset.
size_t fNSamples
The total number of samples in the dataset.
size_t fBatchWidth
The number of columns in each matrix.
size_t fBatchSize
The size of a batch.
size_t fBatchDepth
The number of matrices in the tensor.
const Data_t & fData
The data that should be loaded in the batches.
size_t fBatchHeight
The number od rows in each matrix.
size_t fNOutputFeatures
The number of outputs from the classifier/regressor.
std::vector< TMatrixT< AReal > > inputTensor
The 3D tensor used to keep the input data.
std::vector< size_t > fSampleIndices
Ordering of the samples in the epoch.