Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RooBatchCompute.h
Go to the documentation of this file.
1/*
2 * Project: RooFit
3 * Authors:
4 * Emmanouil Michalainas, CERN 6 January 2021
5 *
6 * Copyright (c) 2021, CERN
7 *
8 * Redistribution and use in source and binary forms,
9 * with or without modification, are permitted according to the terms
10 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
11 */
12
13#ifndef ROOFIT_BATCHCOMPUTE_ROOBATCHCOMPUTE_H
14#define ROOFIT_BATCHCOMPUTE_ROOBATCHCOMPUTE_H
15
16#include <ROOT/RSpan.hxx>
17
18#include <RConfig.h>
19
20#ifdef ROOFIT_CUDA
22#endif
23
24#include <DllImport.h> //for R__EXTERN, needed for windows
25
26#include <initializer_list>
27#include <string>
28
29/**
30 * Namespace for dispatching RooFit computations to various backends.
31 *
32 * This namespace contains an interface for providing high-performance computation functions for use in
33 * RooAbsReal::doEval(), see RooBatchComputeInterface.
34 *
35 * Furthermore, several implementations of this interface can be created, which reside in RooBatchCompute::RF_ARCH,
36 * where RF_ARCH may be replaced by the architecture that this implementation targets, e.g. SSE, AVX, etc.
37 *
38 * Using the pointer RooBatchCompute::dispatch, a computation request can be dispatched to the fastest backend that is
39 * available on a specific platform.
40 */
41namespace RooBatchCompute {
42
43typedef std::span<const std::span<const double>> VarSpan;
44typedef std::span<double> ArgSpan;
45typedef const double *__restrict InputArr;
46
47constexpr std::size_t bufferSize = 64;
48
49void init();
50
51/// Minimal configuration struct to steer the evaluation of a single node with
52/// the RooBatchCompute library.
53class Config {
54public:
55#ifdef ROOFIT_CUDA
56 bool useCuda() const { return _cudaStream != nullptr; }
57 void setCudaStream(RooFit::Detail::CudaInterface::CudaStream *cudaStream) { _cudaStream = cudaStream; }
58 RooFit::Detail::CudaInterface::CudaStream *cudaStream() const { return _cudaStream; }
59
60private:
61 RooFit::Detail::CudaInterface::CudaStream *_cudaStream = nullptr;
62#else
63 bool useCuda() const { return false; }
64#endif
65};
66
68
110
112 double nllSum = 0.0;
113 double nllSumCarry = 0.0;
114 std::size_t nLargeValues = 0;
115 std::size_t nNonPositiveValues = 0;
116 std::size_t nNaNValues = 0;
117};
118
119/**
120 * \class RooBatchComputeInterface
121 * \ingroup Roobatchcompute
122 * \brief The interface which should be implemented to provide optimised computation functions for implementations of
123 * RooAbsReal::doEval().
124 *
125 * The class RooBatchComputeInterface provides the mechanism for external modules (like RooFit) to call
126 * functions from the library. The power lies in the virtual functions that can resolve to different
127 * implementations for the functionality; for example, calling a function through dispatchCuda
128 * will resolve to efficient CUDA implementations.
129 *
130 * This interface contains the signatures of the compute functions of every PDF that has an optimised implementation
131 * available. These are the functions that perform the actual computations in batches.
132 *
133 * Several implementations of this interface may be provided, e.g. SSE, AVX, AVX2 etc. At run time, the fastest
134 * implementation of this interface is selected, and using a virtual call, the computation is dispatched to the best
135 * backend.
136 *
137 * \see RooBatchCompute::dispatch, RooBatchComputeClass, RF_ARCH
138 */
140public:
141 virtual ~RooBatchComputeInterface() = default;
142 virtual void compute(Config const &cfg, Computer, std::span<double> output, VarSpan, ArgSpan) = 0;
143
144 virtual double reduceSum(Config const &cfg, InputArr input, size_t n) = 0;
145 virtual ReduceNLLOutput reduceNLL(Config const &cfg, std::span<const double> probas, std::span<const double> weights,
146 std::span<const double> offsetProbas) = 0;
147
148 virtual Architecture architecture() const = 0;
149 virtual std::string architectureName() const = 0;
150};
151
152/**
153 * This dispatch pointer points to an implementation of the compute library, provided one has been loaded.
154 * Using a virtual call, computation requests are dispatched to backends with architecture-specific functions
155 * such as SSE, AVX, AVX2, etc.
156 *
157 * \see RooBatchComputeInterface, RooBatchComputeClass, RF_ARCH
158 */
160
162{
163 init();
164 return dispatchCPU->architecture();
165}
166
167inline std::string cpuArchitectureName()
168{
169 init();
171}
172
173inline bool hasCuda()
174{
175 init();
176 return dispatchCUDA;
177}
178
179inline void compute(Config cfg, Computer comp, std::span<double> output, VarSpan vars, ArgSpan extraArgs = {})
180{
181 init();
182 auto dispatch = cfg.useCuda() ? dispatchCUDA : dispatchCPU;
183 dispatch->compute(cfg, comp, output, vars, extraArgs);
184}
185
186/// It is not possible to construct a std::span directly from an initializer
187/// list (probably it will be with C++26). That's why we need an explicit
188/// overload for this.
189inline void compute(Config cfg, Computer comp, std::span<double> output,
190 std::initializer_list<std::span<const double>> vars, ArgSpan extraArgs = {})
191{
192 compute(cfg, comp, output, VarSpan{vars.begin(), vars.end()}, extraArgs);
193}
194
195inline double reduceSum(Config cfg, InputArr input, size_t n)
196{
197 init();
198 auto dispatch = cfg.useCuda() ? dispatchCUDA : dispatchCPU;
199 return dispatch->reduceSum(cfg, input, n);
200}
201
202inline ReduceNLLOutput reduceNLL(Config cfg, std::span<const double> probas, std::span<const double> weights,
203 std::span<const double> offsetProbas)
204{
205 init();
206 auto dispatch = cfg.useCuda() ? dispatchCUDA : dispatchCPU;
207 return dispatch->reduceNLL(cfg, probas, weights, offsetProbas);
208}
209
210} // End namespace RooBatchCompute
211
212#endif
#define R__EXTERN
Definition DllImport.h:26
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Minimal configuration struct to steer the evaluation of a single node with the RooBatchCompute librar...
The interface which should be implemented to provide optimised computation functions for implementati...
virtual double reduceSum(Config const &cfg, InputArr input, size_t n)=0
virtual std::string architectureName() const =0
virtual Architecture architecture() const =0
virtual ReduceNLLOutput reduceNLL(Config const &cfg, std::span< const double > probas, std::span< const double > weights, std::span< const double > offsetProbas)=0
virtual void compute(Config const &cfg, Computer, std::span< double > output, VarSpan, ArgSpan)=0
const Int_t n
Definition legend1.C:16
Namespace for dispatching RooFit computations to various backends.
R__EXTERN RooBatchComputeInterface * dispatchCUDA
std::span< double > ArgSpan
std::string cpuArchitectureName()
void compute(Config cfg, Computer comp, std::span< double > output, VarSpan vars, ArgSpan extraArgs={})
R__EXTERN RooBatchComputeInterface * dispatchCPU
This dispatch pointer points to an implementation of the compute library, provided one has been loade...
constexpr std::size_t bufferSize
double reduceSum(Config cfg, InputArr input, size_t n)
ReduceNLLOutput reduceNLL(Config cfg, std::span< const double > probas, std::span< const double > weights, std::span< const double > offsetProbas)
Architecture cpuArchitecture()
const double *__restrict InputArr
void init()
Inspect hardware capabilities, and load the optimal library for RooFit computations.
std::span< const std::span< const double > > VarSpan
static void output()