Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RooBatchCompute.h
Go to the documentation of this file.
1/*
2 * Project: RooFit
3 * Authors:
4 * Emmanouil Michalainas, CERN 6 January 2021
5 *
6 * Copyright (c) 2021, CERN
7 *
8 * Redistribution and use in source and binary forms,
9 * with or without modification, are permitted according to the terms
10 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
11 */
12
13#ifndef ROOFIT_BATCHCOMPUTE_ROOBATCHCOMPUTE_H
14#define ROOFIT_BATCHCOMPUTE_ROOBATCHCOMPUTE_H
15
17
18#include "DllImport.h" //for R__EXTERN, needed for windows
19#include "TError.h"
20
21#include <functional>
22
23/**
24 * Namespace for dispatching RooFit computations to various backends.
25 *
26 * This namespace contains an interface for providing high-performance computation functions for use in RooAbsReal::evaluateSpan(),
27 * see RooBatchComputeInterface.
28 *
29 * Furthermore, several implementations of this interface can be created, which reside in RooBatchCompute::RF_ARCH, where
30 * RF_ARCH may be replaced by the architecture that this implementation targets, e.g. SSE, AVX, etc.
31 *
32 * Using the pointer RooBatchCompute::dispatch, a computation request can be dispatched to the fastest backend that is available
33 * on a specific platform.
34 */
35namespace RooBatchCompute {
36
38
42
43/**
44 * \class RooBatchComputeInterface
45 * \ingroup Roobatchcompute
46 * \brief The interface which should be implemented to provide optimised computation functions for implementations of RooAbsReal::evaluateSpan().
47 *
48 * The class RooBatchComputeInterface provides the mechanism for external modules (like RooFit) to call
49 * functions from the library. The power lies in the virtual functions that can resolve to different
50 * implementations for the functionality; for example, calling a function through dispatchCuda
51 * will resolve to efficient cuda implementations.
52 *
53 * This interface contains the signatures of the compute functions of every PDF that has an optimised implementation available.
54 * These are the functions that perform the actual computations in batches.
55 *
56 * Several implementations of this interface may be provided, e.g. SSE, AVX, AVX2 etc. At run time, the fastest implementation of this interface
57 * is selected, and using a virtual call, the computation is dispatched to the best backend.
58 *
59 * \see RooBatchCompute::dispatch, RooBatchComputeClass, RF_ARCH
60 */
62 public:
63 virtual ~RooBatchComputeInterface() = default;
64 virtual void compute(cudaStream_t*, Computer, RestrictArr, size_t, const VarVector&, const ArgVector& ={}) = 0;
65 virtual double sumReduce(cudaStream_t*, InputArr input, size_t n) = 0;
66 virtual Architecture architecture() const = 0;
67 virtual std::string architectureName() const = 0;
68
69 //cuda functions that need to be interfaced
70 virtual void* cudaMalloc(size_t) { throw std::bad_function_call(); }
71 virtual void cudaFree(void*) { throw std::bad_function_call(); }
72 virtual void* cudaMallocHost(size_t) { throw std::bad_function_call(); }
73 virtual void cudaFreeHost(void*) { throw std::bad_function_call(); }
74 virtual cudaEvent_t* newCudaEvent(bool /*forTiming*/) { throw std::bad_function_call(); }
75 virtual void deleteCudaEvent(cudaEvent_t*) { throw std::bad_function_call(); }
76 virtual cudaStream_t* newCudaStream() { throw std::bad_function_call(); }
77 virtual void deleteCudaStream(cudaStream_t*) { throw std::bad_function_call(); }
78 virtual bool streamIsActive(cudaStream_t*) { throw std::bad_function_call(); }
79 virtual void cudaEventRecord(cudaEvent_t*, cudaStream_t*) { throw std::bad_function_call(); }
80 virtual void cudaStreamWaitEvent(cudaStream_t*, cudaEvent_t*) { throw std::bad_function_call(); }
81 virtual float cudaEventElapsedTime(cudaEvent_t*, cudaEvent_t*) { throw std::bad_function_call(); }
82 virtual void memcpyToCUDA(void*, const void*, size_t, cudaStream_t* =nullptr) { throw std::bad_function_call(); }
83 virtual void memcpyToCPU (void*, const void*, size_t, cudaStream_t* =nullptr) { throw std::bad_function_call(); }
84};
85
86/**
87 * This dispatch pointer points to an implementation of the compute library, provided one has been loaded.
88 * Using a virtual call, computation requests are dispatched to backends with architecture-specific functions
89 * such as SSE, AVX, AVX2, etc.
90 *
91 * \see RooBatchComputeInterface, RooBatchComputeClass, RF_ARCH
92 */
94} // End namespace RooBatchCompute
95
96#endif
#define R__EXTERN
Definition DllImport.h:27
The interface which should be implemented to provide optimised computation functions for implementati...
virtual void cudaEventRecord(cudaEvent_t *, cudaStream_t *)
virtual cudaEvent_t * newCudaEvent(bool)
virtual void compute(cudaStream_t *, Computer, RestrictArr, size_t, const VarVector &, const ArgVector &={})=0
virtual std::string architectureName() const =0
virtual float cudaEventElapsedTime(cudaEvent_t *, cudaEvent_t *)
virtual void memcpyToCPU(void *, const void *, size_t, cudaStream_t *=nullptr)
virtual double sumReduce(cudaStream_t *, InputArr input, size_t n)=0
virtual void cudaStreamWaitEvent(cudaStream_t *, cudaEvent_t *)
virtual bool streamIsActive(cudaStream_t *)
virtual void deleteCudaStream(cudaStream_t *)
virtual void deleteCudaEvent(cudaEvent_t *)
virtual void memcpyToCUDA(void *, const void *, size_t, cudaStream_t *=nullptr)
virtual Architecture architecture() const =0
const Int_t n
Definition legend1.C:16
Namespace for dispatching RooFit computations to various backends.
std::vector< RooSpan< const double > > VarVector
R__EXTERN RooBatchComputeInterface * dispatchCUDA
R__EXTERN RooBatchComputeInterface * dispatchCPU
This dispatch pointer points to an implementation of the compute library, provided one has been loade...
const double *__restrict InputArr
std::vector< double > ArgVector
double *__restrict RestrictArr