doc/v616/DNN_2DataLoader_8h_source.html

// @(#)root/tmva/tmva/dnn:$Id$

// Author: Simon Pfreundschuh 08/08/16


/*************************************************************************

 * Copyright (C) 2016, Simon Pfreundschuh                                *

 * All rights reserved.                                                  *

 *                                                                       *

 * For the licensing terms see $ROOTSYS/LICENSE.                         *

 * For the list of contributors see $ROOTSYS/README/CREDITS.             *

 *************************************************************************/


/////////////////////////////////////////////////////////////////////

// Generic data loader for neural network input data. Provides a   //

// high level abstraction for the transfer of training data to the //

// device.                                                         //

/////////////////////////////////////////////////////////////////////


#ifndef TMVA_DNN_DATALOADER

#define TMVA_DNN_DATALOADER


#include "TMatrix.h"

#include "TMVA/Event.h"


#include <algorithm>

#include <iostream>

#include <random>

#include <vector>


namespace TMVA {


class DataSetInfo;


namespace DNN  {


//

// Input Data Types

//______________________________________________________________________________

using MatrixInput_t = std::tuple<const TMatrixT<Double_t> &, const TMatrixT<Double_t> &, const TMatrixT<Double_t> &>;

using TMVAInput_t =

    std::tuple<const std::vector<Event *> &, const DataSetInfo &>;


using IndexIterator_t = typename std::vector<size_t>::iterator;


/** TBatch

 *

 * Class representing training batches consisting of a matrix of input data

 * and a matrix of output data. The input and output data can be accessed using

 * the GetInput() and GetOutput() member functions.

 *

 * \tparam AArchitecture The underlying architecture.

 */

//______________________________________________________________________________

template <typename AArchitecture>

class TBatch

{

private:


   using Matrix_t       = typename AArchitecture::Matrix_t;


   Matrix_t fInputMatrix;

   Matrix_t fOutputMatrix;

   Matrix_t fWeightMatrix;


public:

   TBatch(Matrix_t &, Matrix_t &, Matrix_t &);

   TBatch(const TBatch  &) = default;

   TBatch(      TBatch &&) = default;

   TBatch & operator=(const TBatch  &) = default;

   TBatch & operator=(      TBatch &&) = default;


   /** Return the matrix representing the input data. */

   Matrix_t &GetInput() { return fInputMatrix; }

   /** Return the matrix representing the output data. */

   Matrix_t &GetOutput() { return fOutputMatrix; }

   /** Return the matrix holding the event weights. */

   Matrix_t &GetWeights() { return fWeightMatrix; }

};


template<typename Data_t, typename AArchitecture> class TDataLoader;


/** TBatchIterator

 *

 * Simple iterator class for the iterations over the training batches in

 * a given data set represented by a TDataLoader object.

 *

 * \tparam AData         The input data type.

 * \tparam AArchitecture The underlying architecture type.

 */

template<typename Data_t, typename AArchitecture>

class TBatchIterator

{

private:


   TDataLoader<Data_t, AArchitecture> & fDataLoader;

   size_t fBatchIndex;


public:


TBatchIterator(TDataLoader<Data_t, AArchitecture> & dataLoader, size_t index = 0)

: fDataLoader(dataLoader), fBatchIndex(index)

{

   // Nothing to do here.

}


   TBatch<AArchitecture> operator*() {return fDataLoader.GetBatch();}

   TBatchIterator operator++() {fBatchIndex++; return *this;}

   bool operator!=(const TBatchIterator & other) {

      return fBatchIndex != other.fBatchIndex;

   }

};


/** TDataLoader

 *

 * Service class managing the streaming of the training data from the input data

 * type to the accelerator device or the CPU. A TDataLoader object manages a number

 * of host and device buffer pairs that are used in a round-robin manner for the

 * transfer of batches to the device.

 *

 * Each TDataLoader object has an associated batch size and a number of total

 * samples in the dataset. One epoch is the number of buffers required to transfer

 * the complete training set. Using the begin() and end() member functions allows

 * the user to iterate over the batches in one epoch.

 *

 * \tparam AData The input data type.

 * \tparam AArchitecture The achitecture class of the underlying architecture.

 */

template<typename Data_t, typename AArchitecture>

class TDataLoader

{

private:


   using HostBuffer_t    = typename AArchitecture::HostBuffer_t;

   using DeviceBuffer_t  = typename AArchitecture::DeviceBuffer_t;

   using Matrix_t        = typename AArchitecture::Matrix_t;

   using BatchIterator_t = TBatchIterator<Data_t, AArchitecture>;


   const Data_t &fData;


   size_t fNSamples;

   size_t fBatchSize;

   size_t fNInputFeatures;

   size_t fNOutputFeatures;

   size_t fBatchIndex;


   size_t fNStreams;                            ///< Number of buffer pairs.

   std::vector<DeviceBuffer_t> fDeviceBuffers;

   std::vector<HostBuffer_t>   fHostBuffers;


   std::vector<size_t> fSampleIndices; ///< Ordering of the samples in the epoch.


public:


   TDataLoader(const Data_t & data, size_t nSamples, size_t batchSize,

               size_t nInputFeatures, size_t nOutputFeatures, size_t nStreams = 1);

   TDataLoader(const TDataLoader  &) = default;

   TDataLoader(      TDataLoader &&) = default;

   TDataLoader & operator=(const TDataLoader  &) = default;

   TDataLoader & operator=(      TDataLoader &&) = default;


   /** Copy input matrix into the given host buffer. Function to be specialized by

    *  the architecture-specific backend. */

   void  CopyInput(HostBuffer_t &buffer, IndexIterator_t begin, size_t batchSize);

   /** Copy output matrix into the given host buffer. Function to be specialized

    * by the architecture-spcific backend. */

   void CopyOutput(HostBuffer_t &buffer, IndexIterator_t begin, size_t batchSize);

   /** Copy weight matrix into the given host buffer. Function to be specialized

    * by the architecture-spcific backend. */

   void CopyWeights(HostBuffer_t &buffer, IndexIterator_t begin, size_t batchSize);


   BatchIterator_t begin() {return TBatchIterator<Data_t, AArchitecture>(*this);}

   BatchIterator_t end()

   {

      return TBatchIterator<Data_t, AArchitecture>(*this, fNSamples / fBatchSize);

   }


   /** Shuffle the order of the samples in the batch. The shuffling is indirect,

    *  i.e. only the indices are shuffled. No input data is moved by this

    * routine. */

   void Shuffle();


   /** Return the next batch from the training set. The TDataLoader object

    *  keeps an internal counter that cycles over the batches in the training

    *  set. */

   TBatch<AArchitecture> GetBatch();


};


//

// TBatch Class.

//______________________________________________________________________________

template <typename AArchitecture>

TBatch<AArchitecture>::TBatch(Matrix_t &inputMatrix, Matrix_t &outputMatrix, Matrix_t &weightMatrix)

   : fInputMatrix(inputMatrix), fOutputMatrix(outputMatrix), fWeightMatrix(weightMatrix)

{

    // Nothing to do here.

}


//

// TDataLoader Class.

//______________________________________________________________________________

template<typename Data_t, typename AArchitecture>

TDataLoader<Data_t, AArchitecture>::TDataLoader(

    const Data_t & data, size_t nSamples, size_t batchSize,

    size_t nInputFeatures, size_t nOutputFeatures, size_t nStreams)

    : fData(data), fNSamples(nSamples), fBatchSize(batchSize),

      fNInputFeatures(nInputFeatures), fNOutputFeatures(nOutputFeatures),

      fBatchIndex(0), fNStreams(nStreams), fDeviceBuffers(), fHostBuffers(),

      fSampleIndices()

{

   size_t inputMatrixSize  = fBatchSize * fNInputFeatures;

   size_t outputMatrixSize = fBatchSize * fNOutputFeatures;

   size_t weightMatrixSize = fBatchSize;


   for (size_t i = 0; i < fNStreams; i++)

   {

      fHostBuffers.push_back(HostBuffer_t(inputMatrixSize + outputMatrixSize + weightMatrixSize));

      fDeviceBuffers.push_back(DeviceBuffer_t(inputMatrixSize + outputMatrixSize + weightMatrixSize));

   }


   fSampleIndices.reserve(fNSamples);

   for (size_t i = 0; i < fNSamples; i++) {

      fSampleIndices.push_back(i);

   }

}


//______________________________________________________________________________

template<typename Data_t, typename AArchitecture>

TBatch<AArchitecture> TDataLoader<Data_t, AArchitecture>::GetBatch()

{

   fBatchIndex %= (fNSamples / fBatchSize); // Cycle through samples.


   size_t inputMatrixSize  = fBatchSize * fNInputFeatures;

   size_t outputMatrixSize = fBatchSize * fNOutputFeatures;

   size_t weightMatrixSize = fBatchSize;


   size_t streamIndex = fBatchIndex % fNStreams;

   HostBuffer_t   & hostBuffer   = fHostBuffers[streamIndex];

   DeviceBuffer_t & deviceBuffer = fDeviceBuffers[streamIndex];


   HostBuffer_t inputHostBuffer  = hostBuffer.GetSubBuffer(0, inputMatrixSize);

   HostBuffer_t outputHostBuffer = hostBuffer.GetSubBuffer(inputMatrixSize,

                                                           outputMatrixSize);

   HostBuffer_t weightHostBuffer = hostBuffer.GetSubBuffer(inputMatrixSize + outputMatrixSize, weightMatrixSize);


   DeviceBuffer_t inputDeviceBuffer  = deviceBuffer.GetSubBuffer(0, inputMatrixSize);

   DeviceBuffer_t outputDeviceBuffer = deviceBuffer.GetSubBuffer(inputMatrixSize,

                                                                 outputMatrixSize);

   DeviceBuffer_t weightDeviceBuffer = deviceBuffer.GetSubBuffer(inputMatrixSize + outputMatrixSize, weightMatrixSize);


   size_t sampleIndex = fBatchIndex * fBatchSize;

   IndexIterator_t sampleIndexIterator = fSampleIndices.begin() + sampleIndex;


   CopyInput(inputHostBuffer,   sampleIndexIterator, fBatchSize);

   CopyOutput(outputHostBuffer, sampleIndexIterator, fBatchSize);

   CopyWeights(weightHostBuffer, sampleIndexIterator, fBatchSize);


   deviceBuffer.CopyFrom(hostBuffer);

   Matrix_t  inputMatrix(inputDeviceBuffer,  fBatchSize, fNInputFeatures);

   Matrix_t outputMatrix(outputDeviceBuffer, fBatchSize, fNOutputFeatures);

   Matrix_t weightMatrix(weightDeviceBuffer, fBatchSize, fNOutputFeatures);


   fBatchIndex++;

   return TBatch<AArchitecture>(inputMatrix, outputMatrix, weightMatrix);

}


//______________________________________________________________________________

template<typename Data_t, typename AArchitecture>

void TDataLoader<Data_t, AArchitecture>::Shuffle()

{

   std::shuffle(fSampleIndices.begin(), fSampleIndices.end(), std::default_random_engine{});

}


} // namespace DNN

} // namespace TMVA


#endif

Event.h

TMatrix.h

TMVA::DNN::TBatchIterator
TBatchIterator.
Definition: DataLoader.h:91

TMVA::DNN::TBatchIterator::fDataLoader
TDataLoader< Data_t, AArchitecture > & fDataLoader
Definition: DataLoader.h:94

TMVA::DNN::TBatchIterator::operator*
TBatch< AArchitecture > operator*()
Definition: DataLoader.h:105

TMVA::DNN::TBatchIterator::fBatchIndex
size_t fBatchIndex
Definition: DataLoader.h:95

TMVA::DNN::TBatchIterator::operator!=
bool operator!=(const TBatchIterator &other)
Definition: DataLoader.h:107

TMVA::DNN::TBatchIterator::TBatchIterator
TBatchIterator(TDataLoader< Data_t, AArchitecture > &dataLoader, size_t index=0)
Definition: DataLoader.h:99

TMVA::DNN::TBatchIterator::operator++
TBatchIterator operator++()
Definition: DataLoader.h:106

TMVA::DNN::TBatch
TBatch.
Definition: DataLoader.h:55

TMVA::DNN::TBatch::fInputMatrix
Matrix_t fInputMatrix
Definition: DataLoader.h:60

TMVA::DNN::TBatch::TBatch
TBatch(const TBatch &)=default

TMVA::DNN::TBatch::fOutputMatrix
Matrix_t fOutputMatrix
Definition: DataLoader.h:61

TMVA::DNN::TBatch::TBatch
TBatch(Matrix_t &, Matrix_t &, Matrix_t &)
Definition: DataLoader.h:192

TMVA::DNN::TBatch::fWeightMatrix
Matrix_t fWeightMatrix
Definition: DataLoader.h:62

TMVA::DNN::TBatch::GetInput
Matrix_t & GetInput()
Return the matrix representing the input data.
Definition: DataLoader.h:72

TMVA::DNN::TBatch::GetOutput
Matrix_t & GetOutput()
Return the matrix representing the output data.
Definition: DataLoader.h:74

TMVA::DNN::TBatch::operator=
TBatch & operator=(const TBatch &)=default

TMVA::DNN::TBatch::GetWeights
Matrix_t & GetWeights()
Return the matrix holding the event weights.
Definition: DataLoader.h:76

TMVA::DNN::TBatch::TBatch
TBatch(TBatch &&)=default

TMVA::DNN::TBatch::operator=
TBatch & operator=(TBatch &&)=default

TMVA::DNN::TBatch::Matrix_t
typename AArchitecture::Matrix_t Matrix_t
Definition: DataLoader.h:58

TMVA::DNN::TDataLoader
TDataLoader.
Definition: DataLoader.h:129

TMVA::DNN::TDataLoader::end
BatchIterator_t end()
Definition: DataLoader.h:171

TMVA::DNN::TDataLoader::fData
const Data_t & fData
Definition: DataLoader.h:137

TMVA::DNN::TDataLoader::TDataLoader
TDataLoader(TDataLoader &&)=default

TMVA::DNN::TDataLoader::CopyInput
void CopyInput(HostBuffer_t &buffer, IndexIterator_t begin, size_t batchSize)
Copy input matrix into the given host buffer.

TMVA::DNN::TDataLoader::begin
BatchIterator_t begin()
Definition: DataLoader.h:170

TMVA::DNN::TDataLoader::fSampleIndices
std::vector< size_t > fSampleIndices
Ordering of the samples in the epoch.
Definition: DataLoader.h:149

TMVA::DNN::TDataLoader::fNSamples
size_t fNSamples
Definition: DataLoader.h:139

TMVA::DNN::TDataLoader::fNInputFeatures
size_t fNInputFeatures
Definition: DataLoader.h:141

TMVA::DNN::TDataLoader::fHostBuffers
std::vector< HostBuffer_t > fHostBuffers
Definition: DataLoader.h:147

TMVA::DNN::TDataLoader::GetBatch
TBatch< AArchitecture > GetBatch()
Return the next batch from the training set.
Definition: DataLoader.h:228

TMVA::DNN::TDataLoader::DeviceBuffer_t
typename AArchitecture::DeviceBuffer_t DeviceBuffer_t
Definition: DataLoader.h:133

TMVA::DNN::TDataLoader::CopyOutput
void CopyOutput(HostBuffer_t &buffer, IndexIterator_t begin, size_t batchSize)
Copy output matrix into the given host buffer.

TMVA::DNN::TDataLoader::operator=
TDataLoader & operator=(TDataLoader &&)=default

TMVA::DNN::TDataLoader::fDeviceBuffers
std::vector< DeviceBuffer_t > fDeviceBuffers
Definition: DataLoader.h:146

TMVA::DNN::TDataLoader::operator=
TDataLoader & operator=(const TDataLoader &)=default

TMVA::DNN::TDataLoader::Matrix_t
typename AArchitecture::Matrix_t Matrix_t
Definition: DataLoader.h:134

TMVA::DNN::TDataLoader::fNStreams
size_t fNStreams
Number of buffer pairs.
Definition: DataLoader.h:145

TMVA::DNN::TDataLoader::TDataLoader
TDataLoader(const Data_t &data, size_t nSamples, size_t batchSize, size_t nInputFeatures, size_t nOutputFeatures, size_t nStreams=1)
Definition: DataLoader.h:202

TMVA::DNN::TDataLoader::fBatchIndex
size_t fBatchIndex
Definition: DataLoader.h:143

TMVA::DNN::TDataLoader::fBatchSize
size_t fBatchSize
Definition: DataLoader.h:140

TMVA::DNN::TDataLoader::HostBuffer_t
typename AArchitecture::HostBuffer_t HostBuffer_t
Definition: DataLoader.h:132

TMVA::DNN::TDataLoader::Shuffle
void Shuffle()
Shuffle the order of the samples in the batch.
Definition: DataLoader.h:269

TMVA::DNN::TDataLoader::fNOutputFeatures
size_t fNOutputFeatures
Definition: DataLoader.h:142

TMVA::DNN::TDataLoader::CopyWeights
void CopyWeights(HostBuffer_t &buffer, IndexIterator_t begin, size_t batchSize)
Copy weight matrix into the given host buffer.

TMVA::DNN::TDataLoader::TDataLoader
TDataLoader(const TDataLoader &)=default

TMVA::DataSetInfo
Class that contains all the data information.
Definition: DataSetInfo.h:60

TMatrixT< Double_t >

ApplicationClassificationKeras.data
data
Definition: ApplicationClassificationKeras.py:17

TMVA::DNN::IndexIterator_t
typename std::vector< size_t >::iterator IndexIterator_t
Definition: DataLoader.h:42

TMVA::DNN::TMVAInput_t
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
Definition: DataLoader.h:40

TMVA::DNN::MatrixInput_t
std::tuple< const TMatrixT< Double_t > &, const TMatrixT< Double_t > &, const TMatrixT< Double_t > & > MatrixInput_t
Definition: DataLoader.h:38

TMVA
Abstract ClassifierFactory template that handles arbitrary types.
Definition: GeneticMinimizer.h:21