13#ifndef ROOFIT_BATCHCOMPUTE_ROOBATCHCOMPUTE_H
14#define ROOFIT_BATCHCOMPUTE_ROOBATCHCOMPUTE_H
21#include <initializer_list>
39namespace CudaInterface {
44typedef std::span<const std::span<const double>>
VarSpan;
138 virtual std::unique_ptr<AbsBuffer>
215 std::initializer_list<std::span<const double>> vars,
ArgSpan extraArgs = {})
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
virtual std::unique_ptr< AbsBuffer > makeScalarBuffer()=0
virtual ~AbsBufferManager()=default
virtual std::unique_ptr< AbsBuffer > makeCpuBuffer(std::size_t size)=0
virtual std::unique_ptr< AbsBuffer > makeGpuBuffer(std::size_t size)=0
virtual std::unique_ptr< AbsBuffer > makePinnedBuffer(std::size_t size, CudaInterface::CudaStream *stream=nullptr)=0
virtual double const * deviceReadPtr() const =0
virtual ~AbsBuffer()=default
virtual void assignFromHost(std::span< const double > input)=0
virtual double const * hostReadPtr() const =0
virtual double * deviceWritePtr()=0
virtual void assignFromDevice(std::span< const double > input)=0
virtual double * hostWritePtr()=0
Minimal configuration struct to steer the evaluation of a single node with the RooBatchCompute librar...
void setCudaStream(CudaInterface::CudaStream *cudaStream)
CudaInterface::CudaStream * _cudaStream
CudaInterface::CudaStream * cudaStream() const
The interface which should be implemented to provide optimised computation functions for implementati...
virtual double reduceSum(Config const &cfg, InputArr input, size_t n)=0
virtual std::string architectureName() const =0
virtual void deleteCudaEvent(CudaInterface::CudaEvent *) const =0
virtual CudaInterface::CudaEvent * newCudaEvent(bool forTiming) const =0
virtual void cudaEventRecord(CudaInterface::CudaEvent *, CudaInterface::CudaStream *) const =0
virtual std::unique_ptr< AbsBufferManager > createBufferManager() const =0
virtual void cudaStreamWaitForEvent(CudaInterface::CudaStream *, CudaInterface::CudaEvent *) const =0
virtual CudaInterface::CudaStream * newCudaStream() const =0
virtual void deleteCudaStream(CudaInterface::CudaStream *) const =0
virtual bool cudaStreamIsActive(CudaInterface::CudaStream *) const =0
virtual Architecture architecture() const =0
virtual ReduceNLLOutput reduceNLL(Config const &cfg, std::span< const double > probas, std::span< const double > weights, std::span< const double > offsetProbas)=0
virtual void compute(Config const &cfg, Computer, std::span< double > output, VarSpan, ArgSpan)=0
virtual ~RooBatchComputeInterface()=default
Namespace for dispatching RooFit computations to various backends.
R__EXTERN RooBatchComputeInterface * dispatchCUDA
std::span< double > ArgSpan
std::string cpuArchitectureName()
void compute(Config cfg, Computer comp, std::span< double > output, VarSpan vars, ArgSpan extraArgs={})
R__EXTERN RooBatchComputeInterface * dispatchCPU
This dispatch pointer points to an implementation of the compute library, provided one has been loade...
constexpr std::size_t bufferSize
double reduceSum(Config cfg, InputArr input, size_t n)
ReduceNLLOutput reduceNLL(Config cfg, std::span< const double > probas, std::span< const double > weights, std::span< const double > offsetProbas)
Architecture cpuArchitecture()
const double *__restrict InputArr
std::span< const std::span< const double > > VarSpan
int initCPU()
Inspect hardware capabilities, and load the optimal library for RooFit computations.
std::size_t nInfiniteValues
std::size_t nNonPositiveValues