Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RooBatchCompute.h
Go to the documentation of this file.
1/*
2 * Project: RooFit
3 * Authors:
4 * Emmanouil Michalainas, CERN 6 January 2021
5 *
6 * Copyright (c) 2021, CERN
7 *
8 * Redistribution and use in source and binary forms,
9 * with or without modification, are permitted according to the terms
10 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
11 */
12
13#ifndef ROOFIT_BATCHCOMPUTE_ROOBATCHCOMPUTE_H
14#define ROOFIT_BATCHCOMPUTE_ROOBATCHCOMPUTE_H
15
16#include <ROOT/RSpan.hxx>
17
18#include <RConfig.h>
19
20#ifdef ROOFIT_CUDA
22#endif
23
24#include <DllImport.h> //for R__EXTERN, needed for windows
25
26#include <cassert>
27#include <functional>
28#include <string>
29#include <vector>
30
31/**
32 * Namespace for dispatching RooFit computations to various backends.
33 *
34 * This namespace contains an interface for providing high-performance computation functions for use in
35 * RooAbsReal::computeBatch(), see RooBatchComputeInterface.
36 *
37 * Furthermore, several implementations of this interface can be created, which reside in RooBatchCompute::RF_ARCH,
38 * where RF_ARCH may be replaced by the architecture that this implementation targets, e.g. SSE, AVX, etc.
39 *
40 * Using the pointer RooBatchCompute::dispatch, a computation request can be dispatched to the fastest backend that is
41 * available on a specific platform.
42 */
43namespace RooBatchCompute {
44
45typedef std::vector<std::span<const double>> VarVector;
46typedef std::vector<double> ArgVector;
47typedef double *__restrict RestrictArr;
48typedef const double *__restrict InputArr;
49
50void init();
51
52/// Minimal configuration struct to steer the evaluation of a single node with
53/// the RooBatchCompute library.
54class Config {
55public:
56#ifdef ROOFIT_CUDA
57 bool useCuda() const { return _cudaStream != nullptr; }
58 void setCudaStream(RooFit::Detail::CudaInterface::CudaStream *cudaStream) { _cudaStream = cudaStream; }
59 RooFit::Detail::CudaInterface::CudaStream *cudaStream() const { return _cudaStream; }
60
61private:
62 RooFit::Detail::CudaInterface::CudaStream *_cudaStream = nullptr;
63#else
64 bool useCuda() const { return false; }
65#endif
66};
67
69
111
113 double nllSum = 0.0;
114 double nllSumCarry = 0.0;
115 std::size_t nLargeValues = 0;
116 std::size_t nNonPositiveValues = 0;
117 std::size_t nNaNValues = 0;
118};
119
120/**
121 * \class RooBatchComputeInterface
122 * \ingroup Roobatchcompute
123 * \brief The interface which should be implemented to provide optimised computation functions for implementations of
124 * RooAbsReal::computeBatch().
125 *
126 * The class RooBatchComputeInterface provides the mechanism for external modules (like RooFit) to call
127 * functions from the library. The power lies in the virtual functions that can resolve to different
128 * implementations for the functionality; for example, calling a function through dispatchCuda
129 * will resolve to efficient CUDA implementations.
130 *
131 * This interface contains the signatures of the compute functions of every PDF that has an optimised implementation
132 * available. These are the functions that perform the actual computations in batches.
133 *
134 * Several implementations of this interface may be provided, e.g. SSE, AVX, AVX2 etc. At run time, the fastest
135 * implementation of this interface is selected, and using a virtual call, the computation is dispatched to the best
136 * backend.
137 *
138 * \see RooBatchCompute::dispatch, RooBatchComputeClass, RF_ARCH
139 */
141public:
142 virtual ~RooBatchComputeInterface() = default;
143 virtual void compute(Config const &cfg, Computer, RestrictArr, size_t, const VarVector &, ArgVector &) = 0;
144 inline void compute(Config const &cfg, Computer comp, RestrictArr output, size_t size, const VarVector &vars)
145 {
146 ArgVector extraArgs{};
147 compute(cfg, comp, output, size, vars, extraArgs);
148 }
149
150 virtual double reduceSum(Config const &cfg, InputArr input, size_t n) = 0;
151 virtual ReduceNLLOutput reduceNLL(Config const &cfg, std::span<const double> probas, std::span<const double> weights,
152 std::span<const double> offsetProbas) = 0;
153
154 virtual Architecture architecture() const = 0;
155 virtual std::string architectureName() const = 0;
156};
157
158/**
159 * This dispatch pointer points to an implementation of the compute library, provided one has been loaded.
160 * Using a virtual call, computation requests are dispatched to backends with architecture-specific functions
161 * such as SSE, AVX, AVX2, etc.
162 *
163 * \see RooBatchComputeInterface, RooBatchComputeClass, RF_ARCH
164 */
166
168{
169 init();
170 return dispatchCPU->architecture();
171}
172
173inline std::string cpuArchitectureName()
174{
175 init();
177}
178
179inline bool hasCuda()
180{
181 init();
182 return dispatchCUDA;
183}
184
185inline void
186compute(Config cfg, Computer comp, RestrictArr output, size_t size, const VarVector &vars, ArgVector &extraArgs)
187{
188 init();
189 auto dispatch = cfg.useCuda() ? dispatchCUDA : dispatchCPU;
190 dispatch->compute(cfg, comp, output, size, vars, extraArgs);
191}
192
193inline void compute(Config cfg, Computer comp, RestrictArr output, size_t size, const VarVector &vars)
194{
195 ArgVector extraArgs{};
196 compute(cfg, comp, output, size, vars, extraArgs);
197}
198
199inline double reduceSum(Config cfg, InputArr input, size_t n)
200{
201 init();
202 auto dispatch = cfg.useCuda() ? dispatchCUDA : dispatchCPU;
203 return dispatch->reduceSum(cfg, input, n);
204}
205
206inline ReduceNLLOutput reduceNLL(Config cfg, std::span<const double> probas, std::span<const double> weights,
207 std::span<const double> offsetProbas)
208{
209 init();
210 auto dispatch = cfg.useCuda() ? dispatchCUDA : dispatchCPU;
211 return dispatch->reduceNLL(cfg, probas, weights, offsetProbas);
212}
213
214} // End namespace RooBatchCompute
215
216#endif
#define R__EXTERN
Definition DllImport.h:27
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Minimal configuration struct to steer the evaluation of a single node with the RooBatchCompute librar...
The interface which should be implemented to provide optimised computation functions for implementati...
virtual double reduceSum(Config const &cfg, InputArr input, size_t n)=0
void compute(Config const &cfg, Computer comp, RestrictArr output, size_t size, const VarVector &vars)
virtual std::string architectureName() const =0
virtual void compute(Config const &cfg, Computer, RestrictArr, size_t, const VarVector &, ArgVector &)=0
virtual Architecture architecture() const =0
virtual ReduceNLLOutput reduceNLL(Config const &cfg, std::span< const double > probas, std::span< const double > weights, std::span< const double > offsetProbas)=0
const Int_t n
Definition legend1.C:16
Namespace for dispatching RooFit computations to various backends.
R__EXTERN RooBatchComputeInterface * dispatchCUDA
std::string cpuArchitectureName()
R__EXTERN RooBatchComputeInterface * dispatchCPU
This dispatch pointer points to an implementation of the compute library, provided one has been loade...
std::vector< std::span< const double > > VarVector
double reduceSum(Config cfg, InputArr input, size_t n)
ReduceNLLOutput reduceNLL(Config cfg, std::span< const double > probas, std::span< const double > weights, std::span< const double > offsetProbas)
Architecture cpuArchitecture()
const double *__restrict InputArr
void init()
Inspect hardware capabilities, and load the optimal library for RooFit computations.
std::vector< double > ArgVector
void compute(Config cfg, Computer comp, RestrictArr output, size_t size, const VarVector &vars, ArgVector &extraArgs)
double *__restrict RestrictArr
static void output()