Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
Evaluator.cxx
Go to the documentation of this file.
1/*
2 * Project: RooFit
3 * Authors:
4 * Jonas Rembser, CERN 2021
5 * Emmanouil Michalainas, CERN 2021
6 *
7 * Copyright (c) 2021, CERN
8 *
9 * Redistribution and use in source and binary forms,
10 * with or without modification, are permitted according to the terms
11 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
12 */
13
14/**
15\file Evaluator.cxx
16\class RooFit::Evaluator
17\ingroup Roofitcore
18
19Evaluates a RooAbsReal object in other ways than recursive graph
20traversal. Currently, it is being used for evaluating a RooAbsReal object and
21supplying the value to the minimizer, during a fit. The class scans the
22dependencies and schedules the computations in a secure and efficient way. The
23computations take place in the RooBatchCompute library and can be carried off
24by either the CPU or a CUDA-supporting GPU. The Evaluator class takes care
25of data transfers. An instance of this class is created every time
26RooAbsPdf::fitTo() is called and gets destroyed when the fitting ends.
27**/
28
29#include <RooFit/Evaluator.h>
30
31#include <RooAbsCategory.h>
32#include <RooAbsData.h>
33#include <RooAbsReal.h>
34#include <RooRealVar.h>
35#include <RooBatchCompute.h>
36#include <RooMsgService.h>
37#include <RooNameReg.h>
38#include <RooSimultaneous.h>
39
41#include "Detail/Buffers.h"
42#include "RooFitImplHelpers.h"
43
44#include <chrono>
45#include <iomanip>
46#include <numeric>
47#include <thread>
48
49#ifdef ROOFIT_CUDA
50
52
54
55#endif
56
57namespace RooFit {
58
59namespace {
60
61// To avoid deleted move assignment.
62template <class T>
63void assignSpan(std::span<T> &to, std::span<T> const &from)
64{
65 to = from;
66}
67
68void logArchitectureInfo(bool useGPU)
69{
70 // We have to exit early if the message stream is not active. Otherwise it's
71 // possible that this function skips logging because it thinks it has
72 // already logged, but actually it didn't.
73 if (!RooMsgService::instance().isActive(nullptr, RooFit::Fitting, RooFit::INFO)) {
74 return;
75 }
76
77 // Don't repeat logging architecture info if the useGPU option didn't change
78 {
79 // Second element of pair tracks whether this function has already been called
80 static std::pair<bool, bool> lastUseGPU;
81 if (lastUseGPU.second && lastUseGPU.first == useGPU)
82 return;
83 lastUseGPU = {useGPU, true};
84 }
85
86 auto log = [](std::string_view message) {
87 oocxcoutI(static_cast<RooAbsArg *>(nullptr), Fitting) << message << std::endl;
88 };
89
90 if (useGPU && !RooBatchCompute::hasCuda()) {
91 throw std::runtime_error(std::string("In: ") + __func__ + "(), " + __FILE__ + ":" + __LINE__ +
92 ": Cuda implementation of the computing library is not available\n");
93 }
95 log("using generic CPU library compiled with no vectorizations");
96 } else {
97 log(std::string("using CPU computation library compiled with -m") + RooBatchCompute::cpuArchitectureName());
98 }
99 if (useGPU) {
100 log("using CUDA computation library");
101 }
102}
103
104} // namespace
105
106/// A struct used by the Evaluator to store information on the RooAbsArgs in
107/// the computation graph.
108struct NodeInfo {
109
110 bool isScalar() const { return outputSize == 1; }
111
112#ifdef ROOFIT_CUDA
113 bool computeInGPU() const { return (absArg->isReducerNode() || !isScalar()) && absArg->canComputeBatchWithCuda(); }
114#endif
115
116 RooAbsArg *absArg = nullptr;
118
119 std::shared_ptr<Detail::AbsBuffer> buffer;
120 std::size_t iNode = 0;
121 int remClients = 0;
122 int remServers = 0;
123#ifdef ROOFIT_CUDA
124 bool copyAfterEvaluation = false;
125#endif
126 bool fromArrayInput = false;
127 bool isVariable = false;
128 bool isDirty = true;
129 bool isCategory = false;
130 bool hasLogged = false;
131 std::size_t outputSize = 1;
132 std::size_t lastSetValCount = std::numeric_limits<std::size_t>::max();
133 double scalarBuffer = 0.0;
134 std::vector<NodeInfo *> serverInfos;
135 std::vector<NodeInfo *> clientInfos;
136
137#ifdef ROOFIT_CUDA
138 std::unique_ptr<RooFit::Detail::CudaInterface::CudaEvent> event;
139 std::unique_ptr<RooFit::Detail::CudaInterface::CudaStream> stream;
140
141 /// Check the servers of a node that has been computed and release its
142 /// resources if they are no longer needed.
143 void decrementRemainingClients()
144 {
145 if (--remClients == 0 && !fromArrayInput) {
146 buffer.reset();
147 }
148 }
149#endif // ROOFIT_CUDA
150};
151
152/// Construct a new Evaluator. The constructor analyzes and saves metadata about the graph,
153/// useful for the evaluation of it that will be done later. In case the CUDA mode is selected,
154/// there's also some CUDA-related initialization.
155///
156/// \param[in] absReal The RooAbsReal object that sits on top of the
157/// computation graph that we want to evaluate.
158/// \param[in] useGPU Whether the evaluation should be preferably done on the GPU.
159Evaluator::Evaluator(const RooAbsReal &absReal, bool useGPU)
160 : _bufferManager{std::make_unique<Detail::BufferManager>()},
161 _topNode{const_cast<RooAbsReal &>(absReal)},
162 _useGPU{useGPU}
163{
164#ifndef ROOFIT_CUDA
165 if (useGPU) {
166 throw std::runtime_error("Can't create Evaluator in CUDA mode because ROOT was compiled without CUDA support!");
167 }
168#endif
169 // Some checks and logging of used architectures
170 logArchitectureInfo(_useGPU);
171
172 RooArgSet serverSet;
174
175 _evalContextCPU.resize(serverSet.size());
176#ifdef ROOFIT_CUDA
177 _evalContextCUDA.resize(serverSet.size());
178#endif
179
180 std::map<RooFit::Detail::DataKey, NodeInfo *> nodeInfos;
181
182 // Fill the ordered nodes list and initialize the node info structs.
183 _nodes.reserve(serverSet.size());
184 std::size_t iNode = 0;
185 for (RooAbsArg *arg : serverSet) {
186
187 _nodes.emplace_back();
188 auto &nodeInfo = _nodes.back();
189 nodeInfo.absArg = arg;
190 nodeInfo.originalOperMode = arg->operMode();
191 nodeInfo.iNode = iNode;
192 nodeInfos[arg] = &nodeInfo;
193
194 if (dynamic_cast<RooRealVar const *>(arg)) {
195 nodeInfo.isVariable = true;
196 } else {
197 arg->setDataToken(iNode);
198 }
199 if (dynamic_cast<RooAbsCategory const *>(arg)) {
200 nodeInfo.isCategory = true;
201 }
202
203 ++iNode;
204 }
205
206 for (NodeInfo &info : _nodes) {
207 info.serverInfos.reserve(info.absArg->servers().size());
208 for (RooAbsArg *server : info.absArg->servers()) {
209 if (server->isValueServer(*info.absArg)) {
210 auto *serverInfo = nodeInfos.at(server);
211 info.serverInfos.emplace_back(serverInfo);
212 serverInfo->clientInfos.emplace_back(&info);
213 }
214 }
215 }
216
218
219#ifdef ROOFIT_CUDA
220 if (_useGPU) {
221 // create events and streams for every node
222 for (auto &info : _nodes) {
223 info.event = std::make_unique<CudaInterface::CudaEvent>(false);
224 info.stream = std::make_unique<CudaInterface::CudaStream>();
226 cfg.setCudaStream(info.stream.get());
227 _evalContextCUDA.setConfig(info.absArg, cfg);
228 }
229 }
230#endif
231}
232
233/// If there are servers with the same name that got de-duplicated in the
234/// `_nodes` list, we need to set their data tokens too. We find such nodes by
235/// visiting the servers of every known node.
237{
238 for (NodeInfo &info : _nodes) {
239 std::size_t iValueServer = 0;
240 for (RooAbsArg *server : info.absArg->servers()) {
241 if (server->isValueServer(*info.absArg)) {
242 auto *knownServer = info.serverInfos[iValueServer]->absArg;
243 if (knownServer->hasDataToken()) {
244 server->setDataToken(knownServer->dataToken());
245 }
246 ++iValueServer;
247 }
248 }
249 }
250}
251
252void Evaluator::setInput(std::string const &name, std::span<const double> inputArray, bool isOnDevice)
253{
254 if (isOnDevice && !_useGPU) {
255 throw std::runtime_error("Evaluator can only take device array as input in CUDA mode!");
256 }
257
258 auto namePtr = RooNameReg::ptr(name.c_str());
259
260 // Iterate over the given data spans and add them to the data map. Check if
261 // they are used in the computation graph. If yes, add the span to the data
262 // map and set the node info accordingly.
263 std::size_t iNode = 0;
264 for (auto &info : _nodes) {
265 const bool fromArrayInput = info.absArg->namePtr() == namePtr;
266 if (fromArrayInput) {
267 info.fromArrayInput = true;
268 info.absArg->setDataToken(iNode);
269 info.outputSize = inputArray.size();
270 if (_useGPU && info.outputSize <= 1) {
271#ifdef ROOFIT_CUDA
272 // Empty or scalar observables from the data don't need to be
273 // copied to the GPU.
274 _evalContextCPU.set(info.absArg, inputArray);
275 _evalContextCUDA.set(info.absArg, inputArray);
276 } else if (_useGPU && info.outputSize > 1) {
277 // For simplicity, we put the data on both host and device for
278 // now. This could be optimized by inspecting the clients of the
279 // variable.
280 if (isOnDevice) {
281 _evalContextCUDA.set(info.absArg, inputArray);
282 auto gpuSpan = _evalContextCUDA.at(info.absArg);
283 info.buffer = _bufferManager->makeCpuBuffer(gpuSpan.size());
284 CudaInterface::copyDeviceToHost(gpuSpan.data(), info.buffer->cpuWritePtr(), gpuSpan.size());
285 _evalContextCPU.set(info.absArg, {info.buffer->cpuReadPtr(), gpuSpan.size()});
286 } else {
287 _evalContextCPU.set(info.absArg, inputArray);
288 auto cpuSpan = _evalContextCPU.at(info.absArg);
289 info.buffer = _bufferManager->makeGpuBuffer(cpuSpan.size());
290 CudaInterface::copyHostToDevice(cpuSpan.data(), info.buffer->gpuWritePtr(), cpuSpan.size());
291 _evalContextCUDA.set(info.absArg, {info.buffer->gpuReadPtr(), cpuSpan.size()});
292 }
293#endif
294 } else {
295 _evalContextCPU.set(info.absArg, inputArray);
296 }
297 }
298 info.isDirty = !info.fromArrayInput;
299 ++iNode;
300 }
301
303}
304
306{
307 std::map<RooFit::Detail::DataKey, std::size_t> sizeMap;
308 for (auto &info : _nodes) {
309 if (info.fromArrayInput) {
310 sizeMap[info.absArg] = info.outputSize;
311 } else {
312 // any buffer for temporary results is invalidated by resetting the output sizes
313 info.buffer.reset();
314 }
315 }
316
317 auto outputSizeMap =
318 RooFit::Detail::BatchModeDataHelpers::determineOutputSizes(_topNode, [&](RooFit::Detail::DataKey key) -> int {
319 auto found = sizeMap.find(key);
320 return found != sizeMap.end() ? found->second : -1;
321 });
322
323 for (auto &info : _nodes) {
324 info.outputSize = outputSizeMap.at(info.absArg);
325
326 // In principle we don't need dirty flag propagation because the driver
327 // takes care of deciding which node needs to be re-evaluated. However,
328 // disabling it also for scalar mode results in very long fitting times
329 // for specific models (test 14 in stressRooFit), which still needs to be
330 // understood. TODO.
331 if (!info.isScalar()) {
332 setOperMode(info.absArg, RooAbsArg::ADirty);
333 } else {
334 setOperMode(info.absArg, info.originalOperMode);
335 }
336 }
337
338#ifdef ROOFIT_CUDA
339 if (_useGPU) {
340 markGPUNodes();
341 }
342#endif
343
345}
346
348{
349 for (auto &info : _nodes) {
350 info.absArg->resetDataToken();
351 }
352}
353
355{
356 using namespace Detail;
357
358 auto nodeAbsReal = static_cast<RooAbsReal const *>(node);
359
360 const std::size_t nOut = info.outputSize;
361
362 double *buffer = nullptr;
363 if (nOut == 1) {
364 buffer = &info.scalarBuffer;
365#ifdef ROOFIT_CUDA
366 if (_useGPU) {
367 _evalContextCUDA.set(node, {buffer, nOut});
368 }
369#endif
370 } else {
371#ifdef ROOFIT_CUDA
372 if (!info.hasLogged && _useGPU) {
373 RooAbsArg const &arg = *info.absArg;
374 oocoutI(&arg, FastEvaluations) << "The argument " << arg.ClassName() << "::" << arg.GetName()
375 << " could not be evaluated on the GPU because the class doesn't support it. "
376 "Consider requesting or implementing it to benefit from a speed up."
377 << std::endl;
378 info.hasLogged = true;
379 }
380#endif
381 if (!info.buffer) {
382#ifdef ROOFIT_CUDA
383 info.buffer = info.copyAfterEvaluation ? _bufferManager->makePinnedBuffer(nOut, info.stream.get())
384 : _bufferManager->makeCpuBuffer(nOut);
385#else
386 info.buffer = _bufferManager->makeCpuBuffer(nOut);
387#endif
388 }
389 buffer = info.buffer->cpuWritePtr();
390 }
391 assignSpan(_evalContextCPU._currentOutput, {buffer, nOut});
392 _evalContextCPU.set(node, {buffer, nOut});
393 if (nOut > 1) {
395 }
396 nodeAbsReal->doEval(_evalContextCPU);
399#ifdef ROOFIT_CUDA
400 if (info.copyAfterEvaluation) {
401 _evalContextCUDA.set(node, {info.buffer->gpuReadPtr(), nOut});
402 if (info.event) {
403 CudaInterface::cudaEventRecord(*info.event, *info.stream);
404 }
405 }
406#endif
407}
408
409/// Process a variable in the computation graph. This is a separate non-inlined
410/// function such that we can see in performance profiles how long this takes.
412{
413 RooAbsArg *node = nodeInfo.absArg;
414 auto *var = static_cast<RooRealVar const *>(node);
415 if (nodeInfo.lastSetValCount != var->valueResetCounter()) {
416 nodeInfo.lastSetValCount = var->valueResetCounter();
417 for (NodeInfo *clientInfo : nodeInfo.clientInfos) {
418 clientInfo->isDirty = true;
419 }
420 computeCPUNode(node, nodeInfo);
421 nodeInfo.isDirty = false;
422 }
423}
424
425/// Flags all the clients of a given node dirty. This is a separate non-inlined
426/// function such that we can see in performance profiles how long this takes.
428{
429 for (NodeInfo *clientInfo : nodeInfo.clientInfos) {
430 clientInfo->isDirty = true;
431 }
432}
433
434/// Returns the value of the top node in the computation graph
435std::span<const double> Evaluator::run()
436{
439
441
442#ifdef ROOFIT_CUDA
443 if (_useGPU) {
444 return getValHeterogeneous();
445 }
446#endif
447
448 for (auto &nodeInfo : _nodes) {
449 if (!nodeInfo.fromArrayInput) {
450 if (nodeInfo.isVariable) {
451 processVariable(nodeInfo);
452 } else {
453 if (nodeInfo.isDirty) {
454 setClientsDirty(nodeInfo);
455 computeCPUNode(nodeInfo.absArg, nodeInfo);
456 nodeInfo.isDirty = false;
457 }
458 }
459 }
460 }
461
462 // return the final output
463 return _evalContextCPU.at(&_topNode);
464}
465
466/// Returns the value of the top node in the computation graph
467std::span<const double> Evaluator::getValHeterogeneous()
468{
469#ifdef ROOFIT_CUDA
470 for (auto &info : _nodes) {
471 info.remClients = info.clientInfos.size();
472 info.remServers = info.serverInfos.size();
473 if (info.buffer && !info.fromArrayInput) {
474 info.buffer.reset();
475 }
476 }
477
478 // find initial GPU nodes and assign them to GPU
479 for (auto &info : _nodes) {
480 if (info.remServers == 0 && info.computeInGPU()) {
481 assignToGPU(info);
482 }
483 }
484
485 NodeInfo const &topNodeInfo = _nodes.back();
486 while (topNodeInfo.remServers != -2) {
487 // find finished GPU nodes
488 for (auto &info : _nodes) {
489 if (info.remServers == -1 && !info.stream->isActive()) {
490 info.remServers = -2;
491 // Decrement number of remaining servers for clients and start GPU computations
492 for (auto *infoClient : info.clientInfos) {
493 --infoClient->remServers;
494 if (infoClient->computeInGPU() && infoClient->remServers == 0) {
495 assignToGPU(*infoClient);
496 }
497 }
498 for (auto *serverInfo : info.serverInfos) {
499 serverInfo->decrementRemainingClients();
500 }
501 }
502 }
503
504 // find next CPU node
505 auto it = _nodes.begin();
506 for (; it != _nodes.end(); it++) {
507 if (it->remServers == 0 && !it->computeInGPU())
508 break;
509 }
510
511 // if no CPU node available sleep for a while to save CPU usage
512 if (it == _nodes.end()) {
513 std::this_thread::sleep_for(std::chrono::milliseconds(1));
514 continue;
515 }
516
517 // compute next CPU node
518 NodeInfo &info = *it;
519 RooAbsArg const *node = info.absArg;
520 info.remServers = -2; // so that it doesn't get picked again
521
522 if (!info.fromArrayInput) {
523 computeCPUNode(node, info);
524 }
525
526 // Assign the clients that are computed on the GPU
527 for (auto *infoClient : info.clientInfos) {
528 if (--infoClient->remServers == 0 && infoClient->computeInGPU()) {
529 assignToGPU(*infoClient);
530 }
531 }
532 for (auto *serverInfo : info.serverInfos) {
533 serverInfo->decrementRemainingClients();
534 }
535 }
536
537 // return the final value
539#else
540 // Doesn't matter what we do here, because it's a private function that's
541 // not called when RooFit is not built with CUDA support.
542 return {};
543#endif // ROOFIT_CUDA
544}
545
546/// Assign a node to be computed in the GPU. Scan it's clients and also assign them
547/// in case they only depend on GPU nodes.
549{
550 using namespace Detail;
551
552 info.remServers = -1;
553
554#ifdef ROOFIT_CUDA
555 auto node = static_cast<RooAbsReal const *>(info.absArg);
556
557 // wait for every server to finish
558 for (auto *infoServer : info.serverInfos) {
559 if (infoServer->event)
560 info.stream->waitForEvent(*infoServer->event);
561 }
562
563 const std::size_t nOut = info.outputSize;
564
565 double *buffer = nullptr;
566 if (nOut == 1) {
567 buffer = &info.scalarBuffer;
568 _evalContextCPU.set(node, {buffer, nOut});
569 } else {
570 info.buffer = info.copyAfterEvaluation ? _bufferManager->makePinnedBuffer(nOut, info.stream.get())
571 : _bufferManager->makeGpuBuffer(nOut);
572 buffer = info.buffer->gpuWritePtr();
573 }
574 assignSpan(_evalContextCUDA._currentOutput, {buffer, nOut});
575 _evalContextCUDA.set(node, {buffer, nOut});
576 node->doEval(_evalContextCUDA);
577 CudaInterface::cudaEventRecord(*info.event, *info.stream);
578 if (info.copyAfterEvaluation) {
579 _evalContextCPU.set(node, {info.buffer->cpuReadPtr(), nOut});
580 }
581#endif // ROOFIT_CUDA
582}
583
584/// Decides which nodes are assigned to the GPU in a CUDA fit.
586{
587#ifdef ROOFIT_CUDA
588 for (auto &info : _nodes) {
589 info.copyAfterEvaluation = false;
590 // scalar nodes don't need copying
591 if (!info.isScalar()) {
592 for (auto *clientInfo : info.clientInfos) {
593 if (info.computeInGPU() != clientInfo->computeInGPU()) {
594 info.copyAfterEvaluation = true;
595 break;
596 }
597 }
598 }
599 }
600#endif // ROOFIT_CUDA
601}
602
603/// Temporarily change the operation mode of a RooAbsArg until the
604/// Evaluator gets deleted.
606{
607 if (opMode != arg->operMode()) {
608 _changeOperModeRAIIs.emplace(std::make_unique<ChangeOperModeRAII>(arg, opMode));
609 }
610}
611
612void Evaluator::print(std::ostream &os)
613{
614 std::cout << "--- RooFit BatchMode evaluation ---\n";
615
616 std::vector<int> widths{9, 37, 20, 9, 10, 20};
617
618 auto printElement = [&](int iCol, auto const &t) {
619 const char separator = ' ';
620 os << separator << std::left << std::setw(widths[iCol]) << std::setfill(separator) << t;
621 os << "|";
622 };
623
624 auto printHorizontalRow = [&]() {
625 int n = 0;
626 for (int w : widths) {
627 n += w + 2;
628 }
629 for (int i = 0; i < n; i++) {
630 os << '-';
631 }
632 os << "|\n";
633 };
634
635 printHorizontalRow();
636
637 os << "|";
638 printElement(0, "Index");
639 printElement(1, "Name");
640 printElement(2, "Class");
641 printElement(3, "Size");
642 printElement(4, "From Data");
643 printElement(5, "1st value");
644 std::cout << "\n";
645
646 printHorizontalRow();
647
648 for (std::size_t iNode = 0; iNode < _nodes.size(); ++iNode) {
649 auto &nodeInfo = _nodes[iNode];
650 RooAbsArg *node = nodeInfo.absArg;
651
652 auto span = _evalContextCPU.at(node);
653
654 os << "|";
655 printElement(0, iNode);
656 printElement(1, node->GetName());
657 printElement(2, node->ClassName());
658 printElement(3, nodeInfo.outputSize);
659 printElement(4, nodeInfo.fromArrayInput);
660 printElement(5, span[0]);
661
662 std::cout << "\n";
663 }
664
665 printHorizontalRow();
666}
667
668/// Gets all the parameters of the RooAbsReal. This is in principle not
669/// necessary, because we can always ask the RooAbsReal itself, but the
670/// Evaluator has the cached information to get the answer quicker.
671/// Therefore, this is not meant to be used in general, just where it matters.
672/// \warning If we find another solution to get the parameters efficiently,
673/// this function might be removed without notice.
675{
676 RooArgSet parameters;
677 for (auto &nodeInfo : _nodes) {
678 if (nodeInfo.isVariable) {
679 parameters.add(*nodeInfo.absArg);
680 }
681 }
682 // Just like in RooAbsArg::getParameters(), we sort the parameters alphabetically.
683 parameters.sort();
684 return parameters;
685}
686
687/// \brief Sets the offset mode for evaluation.
688///
689/// This function sets the offset mode for evaluation to the specified mode.
690/// It updates the offset mode for both CPU and CUDA evaluation contexts.
691///
692/// \param mode The offset mode to be set.
693///
694/// \note This function marks reducer nodes as dirty if the offset mode is
695/// changed, because only reducer nodes can use offsetting.
697{
699 return;
700
703
704 for (auto &nodeInfo : _nodes) {
705 if (nodeInfo.absArg->isReducerNode()) {
706 nodeInfo.isDirty = true;
707 }
708 }
709}
710
711} // namespace RooFit
#define oocoutI(o, a)
#define oocxcoutI(o, a)
Option_t Option_t TPoint TPoint const char mode
char name[80]
Definition TGX11.cxx:110
Common abstract base class for objects that represent a value and a "shape" in RooFit.
Definition RooAbsArg.h:77
virtual bool canComputeBatchWithCuda() const
Definition RooAbsArg.h:574
virtual bool isReducerNode() const
Definition RooAbsArg.h:575
TIterator Use end() or range-based loops.")
Definition RooAbsArg.h:136
OperMode operMode() const
Query the operation mode of this node.
Definition RooAbsArg.h:482
A space to attach TBranches.
virtual bool add(const RooAbsArg &var, bool silent=false)
Add the specified argument to list.
Storage_t::size_type size() const
void sort(bool reverse=false)
Sort collection using std::sort and name comparison.
Abstract base class for objects that represent a real value and implements functionality common to al...
Definition RooAbsReal.h:59
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition RooArgSet.h:55
Minimal configuration struct to steer the evaluation of a single node with the RooBatchCompute librar...
void set(RooAbsArg const *arg, std::span< const double > const &span)
Definition EvalContext.h:91
std::span< const double > at(RooAbsArg const *arg, RooAbsArg const *caller=nullptr)
void enableVectorBuffers(bool enable)
OffsetMode _offsetMode
void setConfig(RooAbsArg const *arg, RooBatchCompute::Config const &config)
std::span< double > _currentOutput
void resize(std::size_t n)
void print(std::ostream &os)
void setClientsDirty(NodeInfo &nodeInfo)
Flags all the clients of a given node dirty.
RooArgSet getParameters() const
Gets all the parameters of the RooAbsReal.
void setOffsetMode(RooFit::EvalContext::OffsetMode)
Sets the offset mode for evaluation.
void syncDataTokens()
If there are servers with the same name that got de-duplicated in the _nodes list,...
const bool _useGPU
Definition Evaluator.h:65
std::vector< NodeInfo > _nodes
Definition Evaluator.h:70
bool _needToUpdateOutputSizes
Definition Evaluator.h:67
std::span< const double > getValHeterogeneous()
Returns the value of the top node in the computation graph.
std::span< const double > run()
Returns the value of the top node in the computation graph.
Evaluator(const RooAbsReal &absReal, bool useGPU=false)
Construct a new Evaluator.
void processVariable(NodeInfo &nodeInfo)
Process a variable in the computation graph.
void markGPUNodes()
Decides which nodes are assigned to the GPU in a CUDA fit.
void assignToGPU(NodeInfo &info)
Assign a node to be computed in the GPU.
void setInput(std::string const &name, std::span< const double > inputArray, bool isOnDevice)
RooFit::EvalContext _evalContextCUDA
Definition Evaluator.h:69
RooFit::EvalContext _evalContextCPU
Definition Evaluator.h:68
std::unique_ptr< Detail::BufferManager > _bufferManager
Definition Evaluator.h:63
void computeCPUNode(const RooAbsArg *node, NodeInfo &info)
std::stack< std::unique_ptr< ChangeOperModeRAII > > _changeOperModeRAIIs
Definition Evaluator.h:71
void setOperMode(RooAbsArg *arg, RooAbsArg::OperMode opMode)
Temporarily change the operation mode of a RooAbsArg until the Evaluator gets deleted.
RooAbsReal & _topNode
Definition Evaluator.h:64
static RooMsgService & instance()
Return reference to singleton instance.
static const TNamed * ptr(const char *stringPtr)
Return a unique TNamed pointer for given C++ string.
Variable that can be changed from the outside.
Definition RooRealVar.h:37
const char * GetName() const override
Returns name of object.
Definition TNamed.h:47
virtual const char * ClassName() const
Returns name of class to which the object belongs.
Definition TObject.cxx:207
RVec< PromoteType< T > > log(const RVec< T > &v)
Definition RVec.hxx:1841
const Int_t n
Definition legend1.C:16
std::string cpuArchitectureName()
Architecture cpuArchitecture()
void cudaEventRecord(CudaEvent &, CudaStream &)
Records a CUDA event.
void copyDeviceToHost(const T *src, T *dest, std::size_t n, CudaStream *=nullptr)
Copies data from the CUDA device to the host.
void copyHostToDevice(const T *src, T *dest, std::size_t n, CudaStream *=nullptr)
Copies data from the host to the CUDA device.
The namespace RooFit contains mostly switches that change the behaviour of functions of PDFs (or othe...
Definition JSONIO.h:26
@ FastEvaluations
void getSortedComputationGraph(RooAbsArg const &func, RooArgSet &out)
A struct used by the Evaluator to store information on the RooAbsArgs in the computation graph.
RooAbsArg * absArg
bool isScalar() const
std::size_t iNode
std::size_t lastSetValCount
std::vector< NodeInfo * > serverInfos
std::shared_ptr< Detail::AbsBuffer > buffer
RooAbsArg::OperMode originalOperMode
std::size_t outputSize
std::vector< NodeInfo * > clientInfos