13#ifndef ROOFIT_BATCHCOMPUTE_ROOBATCHCOMPUTE_H 
   14#define ROOFIT_BATCHCOMPUTE_ROOBATCHCOMPUTE_H 
   21#include <initializer_list> 
   39namespace CudaInterface {
 
 
   44typedef std::span<const std::span<const double>> 
VarSpan;
 
  138   virtual std::unique_ptr<AbsBuffer>
 
 
  215                    std::initializer_list<std::span<const double>> vars, 
ArgSpan extraArgs = {})
 
 
 
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
 
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
 
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
 
virtual std::unique_ptr< AbsBuffer > makeScalarBuffer()=0
 
virtual ~AbsBufferManager()=default
 
virtual std::unique_ptr< AbsBuffer > makeCpuBuffer(std::size_t size)=0
 
virtual std::unique_ptr< AbsBuffer > makeGpuBuffer(std::size_t size)=0
 
virtual std::unique_ptr< AbsBuffer > makePinnedBuffer(std::size_t size, CudaInterface::CudaStream *stream=nullptr)=0
 
virtual double const * deviceReadPtr() const =0
 
virtual ~AbsBuffer()=default
 
virtual void assignFromHost(std::span< const double > input)=0
 
virtual double const * hostReadPtr() const =0
 
virtual double * deviceWritePtr()=0
 
virtual void assignFromDevice(std::span< const double > input)=0
 
virtual double * hostWritePtr()=0
 
Minimal configuration struct to steer the evaluation of a single node with the RooBatchCompute librar...
 
void setCudaStream(CudaInterface::CudaStream *cudaStream)
 
CudaInterface::CudaStream * _cudaStream
 
CudaInterface::CudaStream * cudaStream() const
 
The interface which should be implemented to provide optimised computation functions for implementati...
 
virtual double reduceSum(Config const &cfg, InputArr input, size_t n)=0
 
virtual std::string architectureName() const =0
 
virtual void deleteCudaEvent(CudaInterface::CudaEvent *) const =0
 
virtual CudaInterface::CudaEvent * newCudaEvent(bool forTiming) const =0
 
virtual void cudaEventRecord(CudaInterface::CudaEvent *, CudaInterface::CudaStream *) const =0
 
virtual std::unique_ptr< AbsBufferManager > createBufferManager() const =0
 
virtual void cudaStreamWaitForEvent(CudaInterface::CudaStream *, CudaInterface::CudaEvent *) const =0
 
virtual CudaInterface::CudaStream * newCudaStream() const =0
 
virtual void deleteCudaStream(CudaInterface::CudaStream *) const =0
 
virtual bool cudaStreamIsActive(CudaInterface::CudaStream *) const =0
 
virtual Architecture architecture() const =0
 
virtual ReduceNLLOutput reduceNLL(Config const &cfg, std::span< const double > probas, std::span< const double > weights, std::span< const double > offsetProbas)=0
 
virtual void compute(Config const &cfg, Computer, std::span< double > output, VarSpan, ArgSpan)=0
 
virtual ~RooBatchComputeInterface()=default
 
Namespace for dispatching RooFit computations to various backends.
 
R__EXTERN RooBatchComputeInterface * dispatchCUDA
 
std::span< double > ArgSpan
 
std::string cpuArchitectureName()
 
void compute(Config cfg, Computer comp, std::span< double > output, VarSpan vars, ArgSpan extraArgs={})
 
R__EXTERN RooBatchComputeInterface * dispatchCPU
This dispatch pointer points to an implementation of the compute library, provided one has been loade...
 
constexpr std::size_t bufferSize
 
double reduceSum(Config cfg, InputArr input, size_t n)
 
ReduceNLLOutput reduceNLL(Config cfg, std::span< const double > probas, std::span< const double > weights, std::span< const double > offsetProbas)
 
Architecture cpuArchitecture()
 
const double *__restrict InputArr
 
std::span< const std::span< const double > > VarSpan
 
int initCPU()
Inspect hardware capabilities, and load the optimal library for RooFit computations.
 
std::size_t nNonPositiveValues