The interface which should be implemented to provide optimised computation functions for implementations of RooAbsReal::evaluateSpan().
The class RooBatchComputeInterface provides the mechanism for external modules (like RooFit) to call functions from the library. The power lies in the virtual functions that can resolve to different implementations for the functionality; for example, calling a function through dispatchCuda will resolve to efficient cuda implementations.
This interface contains the signatures of the compute functions of every PDF that has an optimised implementation available. These are the functions that perform the actual computations in batches.
Several implementations of this interface may be provided, e.g. SSE, AVX, AVX2 etc. At run time, the fastest implementation of this interface is selected, and using a virtual call, the computation is dispatched to the best backend.
- See also
- RooBatchCompute::dispatch, RooBatchComputeClass, RF_ARCH
Definition at line 107 of file RooBatchCompute.h.
|
virtual | ~RooBatchComputeInterface ()=default |
|
virtual Architecture | architecture () const =0 |
|
virtual std::string | architectureName () const =0 |
|
virtual void | compute (cudaStream_t *, Computer, RestrictArr, size_t, const VarVector &, ArgVector &)=0 |
|
void | compute (cudaStream_t *stream, Computer comp, RestrictArr output, size_t size, const VarVector &vars) |
|
virtual float | cudaEventElapsedTime (cudaEvent_t *, cudaEvent_t *) |
|
virtual void | cudaEventRecord (cudaEvent_t *, cudaStream_t *) |
|
virtual void | cudaFree (void *) |
|
virtual void | cudaFreeHost (void *) |
|
virtual void * | cudaMalloc (size_t) |
|
virtual void * | cudaMallocHost (size_t) |
|
virtual void | cudaStreamWaitEvent (cudaStream_t *, cudaEvent_t *) |
|
virtual void | deleteCudaEvent (cudaEvent_t *) |
|
virtual void | deleteCudaStream (cudaStream_t *) |
|
virtual void | memcpyToCPU (void *, const void *, size_t, cudaStream_t *=nullptr) |
|
virtual void | memcpyToCUDA (void *, const void *, size_t, cudaStream_t *=nullptr) |
|
virtual cudaEvent_t * | newCudaEvent (bool) |
|
virtual cudaStream_t * | newCudaStream () |
|
virtual ReduceNLLOutput | reduceNLL (cudaStream_t *, RooSpan< const double > probas, RooSpan< const double > weightSpan, RooSpan< const double > weights, double weightSum, RooSpan< const double > binVolumes)=0 |
|
virtual double | reduceSum (cudaStream_t *, InputArr input, size_t n)=0 |
|
virtual bool | streamIsActive (cudaStream_t *) |
|