Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
Evaluator.cxx
Go to the documentation of this file.
1/*
2 * Project: RooFit
3 * Authors:
4 * Jonas Rembser, CERN 2021
5 * Emmanouil Michalainas, CERN 2021
6 *
7 * Copyright (c) 2021, CERN
8 *
9 * Redistribution and use in source and binary forms,
10 * with or without modification, are permitted according to the terms
11 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
12 */
13
14/**
15\file Evaluator.cxx
16\class Evaluator
17\ingroup Roofitcore
18
19Evaluates a RooAbsReal object in other ways than recursive graph
20traversal. Currently, it is being used for evaluating a RooAbsReal object and
21supplying the value to the minimizer, during a fit. The class scans the
22dependencies and schedules the computations in a secure and efficient way. The
23computations take place in the RooBatchCompute library and can be carried off
24by either the CPU or a CUDA-supporting GPU. The Evaluator class takes care
25of data transfers. An instance of this class is created every time
26RooAbsPdf::fitTo() is called and gets destroyed when the fitting ends.
27**/
28
29#include <RooFit/Evaluator.h>
30
31#include <RooAbsCategory.h>
32#include <RooAbsData.h>
33#include <RooAbsReal.h>
34#include <RooRealVar.h>
35#include <RooBatchCompute.h>
36#include <RooMsgService.h>
37#include <RooNameReg.h>
38#include <RooSimultaneous.h>
39
41#include "BatchModeHelpers.h"
42#include "Detail/Buffers.h"
43#include "RooFitImplHelpers.h"
44
45#include <chrono>
46#include <iomanip>
47#include <numeric>
48#include <thread>
49
50#ifdef ROOFIT_CUDA
51
53
55
56#endif
57
58namespace RooFit {
59
60namespace {
61
62void logArchitectureInfo(bool useGPU)
63{
64 // We have to exit early if the message stream is not active. Otherwise it's
65 // possible that this function skips logging because it thinks it has
66 // already logged, but actually it didn't.
67 if (!RooMsgService::instance().isActive(nullptr, RooFit::Fitting, RooFit::INFO)) {
68 return;
69 }
70
71 // Don't repeat logging architecture info if the useGPU option didn't change
72 {
73 // Second element of pair tracks whether this function has already been called
74 static std::pair<bool, bool> lastUseGPU;
75 if (lastUseGPU.second && lastUseGPU.first == useGPU)
76 return;
77 lastUseGPU = {useGPU, true};
78 }
79
80 auto log = [](std::string_view message) {
81 oocxcoutI(static_cast<RooAbsArg *>(nullptr), Fitting) << message << std::endl;
82 };
83
84 if (useGPU && !RooBatchCompute::hasCuda()) {
85 throw std::runtime_error(std::string("In: ") + __func__ + "(), " + __FILE__ + ":" + __LINE__ +
86 ": Cuda implementation of the computing library is not available\n");
87 }
89 log("using generic CPU library compiled with no vectorizations");
90 } else {
91 log(std::string("using CPU computation library compiled with -m") + RooBatchCompute::cpuArchitectureName());
92 }
93 if (useGPU) {
94 log("using CUDA computation library");
95 }
96}
97
98} // namespace
99
100/// A struct used by the Evaluator to store information on the RooAbsArgs in
101/// the computation graph.
102struct NodeInfo {
103
104 bool isScalar() const { return outputSize == 1; }
105
106#ifdef ROOFIT_CUDA
107 bool computeInGPU() const { return (absArg->isReducerNode() || !isScalar()) && absArg->canComputeBatchWithCuda(); }
108#endif
109
110 RooAbsArg *absArg = nullptr;
112
113 std::shared_ptr<Detail::AbsBuffer> buffer;
114 std::size_t iNode = 0;
115 int remClients = 0;
116 int remServers = 0;
117#ifdef ROOFIT_CUDA
118 bool copyAfterEvaluation = false;
119#endif
120 bool fromArrayInput = false;
121 bool isVariable = false;
122 bool isDirty = true;
123 bool isCategory = false;
124 bool hasLogged = false;
125 std::size_t outputSize = 1;
126 std::size_t lastSetValCount = std::numeric_limits<std::size_t>::max();
127 double scalarBuffer = 0.0;
128 std::vector<NodeInfo *> serverInfos;
129 std::vector<NodeInfo *> clientInfos;
130
131#ifdef ROOFIT_CUDA
132 std::unique_ptr<RooFit::Detail::CudaInterface::CudaEvent> event;
133 std::unique_ptr<RooFit::Detail::CudaInterface::CudaStream> stream;
134
135 /// Check the servers of a node that has been computed and release it's resources
136 /// if they are no longer needed.
137 void decrementRemainingClients()
138 {
139 if (--remClients == 0 && !fromArrayInput) {
140 buffer.reset();
141 }
142 }
143#endif // ROOFIT_CUDA
144};
145
146/// Construct a new Evaluator. The constructor analyzes and saves metadata about the graph,
147/// useful for the evaluation of it that will be done later. In case the CUDA mode is selected,
148/// there's also some CUDA-related initialization.
149///
150/// \param[in] absReal The RooAbsReal object that sits on top of the
151/// computation graph that we want to evaluate.
152/// \param[in] useGPU Whether the evaluation should be preferably done on the GPU.
153Evaluator::Evaluator(const RooAbsReal &absReal, bool useGPU)
154 : _bufferManager{std::make_unique<Detail::BufferManager>()},
155 _topNode{const_cast<RooAbsReal &>(absReal)},
156 _useGPU{useGPU}
157{
158#ifndef ROOFIT_CUDA
159 if (useGPU) {
160 throw std::runtime_error("Can't create Evaluator in CUDA mode because ROOT was compiled without CUDA support!");
161 }
162#endif
163 // Some checks and logging of used architectures
164 logArchitectureInfo(_useGPU);
165
166 RooArgSet serverSet;
168
169 _dataMapCPU.resize(serverSet.size());
170#ifdef ROOFIT_CUDA
171 _dataMapCUDA.resize(serverSet.size());
172#endif
173
174 std::map<RooFit::Detail::DataKey, NodeInfo *> nodeInfos;
175
176 // Fill the ordered nodes list and initialize the node info structs.
177 _nodes.reserve(serverSet.size());
178 std::size_t iNode = 0;
179 for (RooAbsArg *arg : serverSet) {
180
181 _nodes.emplace_back();
182 auto &nodeInfo = _nodes.back();
183 nodeInfo.absArg = arg;
184 nodeInfo.originalOperMode = arg->operMode();
185 nodeInfo.iNode = iNode;
186 nodeInfos[arg] = &nodeInfo;
187
188 if (dynamic_cast<RooRealVar const *>(arg)) {
189 nodeInfo.isVariable = true;
190 } else {
191 arg->setDataToken(iNode);
192 }
193 if (dynamic_cast<RooAbsCategory const *>(arg)) {
194 nodeInfo.isCategory = true;
195 }
196
197 ++iNode;
198 }
199
200 for (NodeInfo &info : _nodes) {
201 info.serverInfos.reserve(info.absArg->servers().size());
202 for (RooAbsArg *server : info.absArg->servers()) {
203 if (server->isValueServer(*info.absArg)) {
204 auto *serverInfo = nodeInfos.at(server);
205 info.serverInfos.emplace_back(serverInfo);
206 serverInfo->clientInfos.emplace_back(&info);
207 }
208 }
209 }
210
212
213#ifdef ROOFIT_CUDA
214 if (_useGPU) {
215 // create events and streams for every node
216 for (auto &info : _nodes) {
217 info.event = std::make_unique<CudaInterface::CudaEvent>(false);
218 info.stream = std::make_unique<CudaInterface::CudaStream>();
220 cfg.setCudaStream(info.stream.get());
221 _dataMapCUDA.setConfig(info.absArg, cfg);
222 }
223 }
224#endif
225}
226
227/// If there are servers with the same name that got de-duplicated in the
228/// `_nodes` list, we need to set their data tokens too. We find such nodes by
229/// visiting the servers of every known node.
231{
232 for (NodeInfo &info : _nodes) {
233 std::size_t iValueServer = 0;
234 for (RooAbsArg *server : info.absArg->servers()) {
235 if (server->isValueServer(*info.absArg)) {
236 auto *knownServer = info.serverInfos[iValueServer]->absArg;
237 if (knownServer->hasDataToken()) {
238 server->setDataToken(knownServer->dataToken());
239 }
240 ++iValueServer;
241 }
242 }
243 }
244}
245
246void Evaluator::setInput(std::string const &name, std::span<const double> inputArray, bool isOnDevice)
247{
248 if (isOnDevice && !_useGPU) {
249 throw std::runtime_error("Evaluator can only take device array as input in CUDA mode!");
250 }
251
252 auto namePtr = RooNameReg::ptr(name.c_str());
253
254 // Iterate over the given data spans and add them to the data map. Check if
255 // they are used in the computation graph. If yes, add the span to the data
256 // map and set the node info accordingly.
257 std::size_t iNode = 0;
258 for (auto &info : _nodes) {
259 const bool fromArrayInput = info.absArg->namePtr() == namePtr;
260 if (fromArrayInput) {
261 info.fromArrayInput = true;
262 info.absArg->setDataToken(iNode);
263 info.outputSize = inputArray.size();
264 if (_useGPU) {
265#ifdef ROOFIT_CUDA
266 if (info.outputSize == 1) {
267 // Scalar observables from the data don't need to be copied to the GPU
268 _dataMapCPU.set(info.absArg, inputArray);
269 _dataMapCUDA.set(info.absArg, inputArray);
270 } else {
271 if (_useGPU) {
272 // For simplicity, we put the data on both host and device for
273 // now. This could be optimized by inspecting the clients of the
274 // variable.
275 if (isOnDevice) {
276 _dataMapCUDA.set(info.absArg, inputArray);
277 auto gpuSpan = _dataMapCUDA.at(info.absArg);
278 info.buffer = _bufferManager->makeCpuBuffer(gpuSpan.size());
279 CudaInterface::copyDeviceToHost(gpuSpan.data(), info.buffer->cpuWritePtr(), gpuSpan.size());
280 _dataMapCPU.set(info.absArg, {info.buffer->cpuReadPtr(), gpuSpan.size()});
281 } else {
282 _dataMapCPU.set(info.absArg, inputArray);
283 auto cpuSpan = _dataMapCPU.at(info.absArg);
284 info.buffer = _bufferManager->makeGpuBuffer(cpuSpan.size());
285 CudaInterface::copyHostToDevice(cpuSpan.data(), info.buffer->gpuWritePtr(), cpuSpan.size());
286 _dataMapCUDA.set(info.absArg, {info.buffer->gpuReadPtr(), cpuSpan.size()});
287 }
288 } else {
289 _dataMapCPU.set(info.absArg, inputArray);
290 }
291 }
292#endif
293 } else {
294 _dataMapCPU.set(info.absArg, inputArray);
295 }
296 }
297 info.isDirty = !info.fromArrayInput;
298 ++iNode;
299 }
300
302}
303
305{
306 std::map<RooFit::Detail::DataKey, std::size_t> sizeMap;
307 for (auto &info : _nodes) {
308 if (info.fromArrayInput) {
309 sizeMap[info.absArg] = info.outputSize;
310 } else {
311 // any buffer for temporary results is invalidated by resetting the output sizes
312 info.buffer.reset();
313 }
314 }
315
316 auto outputSizeMap = RooFit::BatchModeDataHelpers::determineOutputSizes(_topNode, [&](RooFit::Detail::DataKey key) {
317 auto found = sizeMap.find(key);
318 return found != sizeMap.end() ? found->second : 0;
319 });
320
321 for (auto &info : _nodes) {
322 info.outputSize = outputSizeMap.at(info.absArg);
323
324 // In principle we don't need dirty flag propagation because the driver
325 // takes care of deciding which node needs to be re-evaluated. However,
326 // disabling it also for scalar mode results in very long fitting times
327 // for specific models (test 14 in stressRooFit), which still needs to be
328 // understood. TODO.
329 if (!info.isScalar()) {
330 setOperMode(info.absArg, RooAbsArg::ADirty);
331 } else {
332 setOperMode(info.absArg, info.originalOperMode);
333 }
334 }
335
336#ifdef ROOFIT_CUDA
337 if (_useGPU) {
338 markGPUNodes();
339 }
340#endif
341
343}
344
346{
347 for (auto &info : _nodes) {
348 info.absArg->resetDataToken();
349 }
350}
351
353{
354 using namespace Detail;
355
356 auto nodeAbsReal = static_cast<RooAbsReal const *>(node);
357
358 const std::size_t nOut = info.outputSize;
359
360 double *buffer = nullptr;
361 if (nOut == 1) {
362 buffer = &info.scalarBuffer;
363#ifdef ROOFIT_CUDA
364 if (_useGPU) {
365 _dataMapCUDA.set(node, {buffer, nOut});
366 }
367#endif
368 } else {
369#ifdef ROOFIT_CUDA
370 if (!info.hasLogged && _useGPU) {
371 RooAbsArg const &arg = *info.absArg;
372 oocoutI(&arg, FastEvaluations) << "The argument " << arg.ClassName() << "::" << arg.GetName()
373 << " could not be evaluated on the GPU because the class doesn't support it. "
374 "Consider requesting or implementing it to benefit from a speed up."
375 << std::endl;
376 info.hasLogged = true;
377 }
378#endif
379 if (!info.buffer) {
380#ifdef ROOFIT_CUDA
381 info.buffer = info.copyAfterEvaluation ? _bufferManager->makePinnedBuffer(nOut, info.stream.get())
382 : _bufferManager->makeCpuBuffer(nOut);
383#else
384 info.buffer = _bufferManager->makeCpuBuffer(nOut);
385#endif
386 }
387 buffer = info.buffer->cpuWritePtr();
388 }
389 _dataMapCPU.set(node, {buffer, nOut});
390 nodeAbsReal->computeBatch(buffer, nOut, _dataMapCPU);
391#ifdef ROOFIT_CUDA
392 if (info.copyAfterEvaluation) {
393 _dataMapCUDA.set(node, {info.buffer->gpuReadPtr(), nOut});
394 if (info.event) {
395 CudaInterface::cudaEventRecord(*info.event, *info.stream);
396 }
397 }
398#endif
399}
400
401/// Process a variable in the computation graph. This is a separate non-inlined
402/// function such that we can see in performance profiles how long this takes.
404{
405 RooAbsArg *node = nodeInfo.absArg;
406 auto *var = static_cast<RooRealVar const *>(node);
407 if (nodeInfo.lastSetValCount != var->valueResetCounter()) {
408 nodeInfo.lastSetValCount = var->valueResetCounter();
409 for (NodeInfo *clientInfo : nodeInfo.clientInfos) {
410 clientInfo->isDirty = true;
411 }
412 computeCPUNode(node, nodeInfo);
413 nodeInfo.isDirty = false;
414 }
415}
416
417/// Flags all the clients of a given node dirty. This is a separate non-inlined
418/// function such that we can see in performance profiles how long this takes.
420{
421 for (NodeInfo *clientInfo : nodeInfo.clientInfos) {
422 clientInfo->isDirty = true;
423 }
424}
425
426/// Returns the value of the top node in the computation graph
427std::span<const double> Evaluator::run()
428{
431
433
434#ifdef ROOFIT_CUDA
435 if (_useGPU) {
436 return getValHeterogeneous();
437 }
438#endif
439
440 for (auto &nodeInfo : _nodes) {
441 if (!nodeInfo.fromArrayInput) {
442 if (nodeInfo.isVariable) {
443 processVariable(nodeInfo);
444 } else {
445 if (nodeInfo.isDirty) {
446 setClientsDirty(nodeInfo);
447 computeCPUNode(nodeInfo.absArg, nodeInfo);
448 nodeInfo.isDirty = false;
449 }
450 }
451 }
452 }
453
454 // return the final output
455 return _dataMapCPU.at(&_topNode);
456}
457
458/// Returns the value of the top node in the computation graph
459std::span<const double> Evaluator::getValHeterogeneous()
460{
461#ifdef ROOFIT_CUDA
462 for (auto &info : _nodes) {
463 info.remClients = info.clientInfos.size();
464 info.remServers = info.serverInfos.size();
465 if (info.buffer && !info.fromArrayInput) {
466 info.buffer.reset();
467 }
468 }
469
470 // find initial GPU nodes and assign them to GPU
471 for (auto &info : _nodes) {
472 if (info.remServers == 0 && info.computeInGPU()) {
473 assignToGPU(info);
474 }
475 }
476
477 NodeInfo const &topNodeInfo = _nodes.back();
478 while (topNodeInfo.remServers != -2) {
479 // find finished GPU nodes
480 for (auto &info : _nodes) {
481 if (info.remServers == -1 && !info.stream->isActive()) {
482 info.remServers = -2;
483 // Decrement number of remaining servers for clients and start GPU computations
484 for (auto *infoClient : info.clientInfos) {
485 --infoClient->remServers;
486 if (infoClient->computeInGPU() && infoClient->remServers == 0) {
487 assignToGPU(*infoClient);
488 }
489 }
490 for (auto *serverInfo : info.serverInfos) {
491 serverInfo->decrementRemainingClients();
492 }
493 }
494 }
495
496 // find next CPU node
497 auto it = _nodes.begin();
498 for (; it != _nodes.end(); it++) {
499 if (it->remServers == 0 && !it->computeInGPU())
500 break;
501 }
502
503 // if no CPU node available sleep for a while to save CPU usage
504 if (it == _nodes.end()) {
505 std::this_thread::sleep_for(std::chrono::milliseconds(1));
506 continue;
507 }
508
509 // compute next CPU node
510 NodeInfo &info = *it;
511 RooAbsArg const *node = info.absArg;
512 info.remServers = -2; // so that it doesn't get picked again
513
514 if (!info.fromArrayInput) {
515 computeCPUNode(node, info);
516 }
517
518 // Assign the clients that are computed on the GPU
519 for (auto *infoClient : info.clientInfos) {
520 if (--infoClient->remServers == 0 && infoClient->computeInGPU()) {
521 assignToGPU(*infoClient);
522 }
523 }
524 for (auto *serverInfo : info.serverInfos) {
525 serverInfo->decrementRemainingClients();
526 }
527 }
528
529 // return the final value
530 return _dataMapCUDA.at(&_topNode);
531#else
532 // Doesn't matter what we do here, because it's a private function that's
533 // not called when RooFit is not built with CUDA support.
534 return {};
535#endif // ROOFIT_CUDA
536}
537
538/// Assign a node to be computed in the GPU. Scan it's clients and also assign them
539/// in case they only depend on GPU nodes.
541{
542 using namespace Detail;
543
544 info.remServers = -1;
545
546#ifdef ROOFIT_CUDA
547 auto node = static_cast<RooAbsReal const *>(info.absArg);
548
549 // wait for every server to finish
550 for (auto *infoServer : info.serverInfos) {
551 if (infoServer->event)
552 info.stream->waitForEvent(*infoServer->event);
553 }
554
555 const std::size_t nOut = info.outputSize;
556
557 double *buffer = nullptr;
558 if (nOut == 1) {
559 buffer = &info.scalarBuffer;
560 _dataMapCPU.set(node, {buffer, nOut});
561 } else {
562 info.buffer = info.copyAfterEvaluation ? _bufferManager->makePinnedBuffer(nOut, info.stream.get())
563 : _bufferManager->makeGpuBuffer(nOut);
564 buffer = info.buffer->gpuWritePtr();
565 }
566 _dataMapCUDA.set(node, {buffer, nOut});
567 node->computeBatch(buffer, nOut, _dataMapCUDA);
568 CudaInterface::cudaEventRecord(*info.event, *info.stream);
569 if (info.copyAfterEvaluation) {
570 _dataMapCPU.set(node, {info.buffer->cpuReadPtr(), nOut});
571 }
572#endif // ROOFIT_CUDA
573}
574
575/// Decides which nodes are assigned to the GPU in a CUDA fit.
577{
578#ifdef ROOFIT_CUDA
579 for (auto &info : _nodes) {
580 info.copyAfterEvaluation = false;
581 // scalar nodes don't need copying
582 if (!info.isScalar()) {
583 for (auto *clientInfo : info.clientInfos) {
584 if (info.computeInGPU() != clientInfo->computeInGPU()) {
585 info.copyAfterEvaluation = true;
586 break;
587 }
588 }
589 }
590 }
591#endif // ROOFIT_CUDA
592}
593
594/// Temporarily change the operation mode of a RooAbsArg until the
595/// Evaluator gets deleted.
597{
598 if (opMode != arg->operMode()) {
599 _changeOperModeRAIIs.emplace(std::make_unique<ChangeOperModeRAII>(arg, opMode));
600 }
601}
602
603void Evaluator::print(std::ostream &os) const
604{
605 std::cout << "--- RooFit BatchMode evaluation ---\n";
606
607 std::vector<int> widths{9, 37, 20, 9, 10, 20};
608
609 auto printElement = [&](int iCol, auto const &t) {
610 const char separator = ' ';
611 os << separator << std::left << std::setw(widths[iCol]) << std::setfill(separator) << t;
612 os << "|";
613 };
614
615 auto printHorizontalRow = [&]() {
616 int n = 0;
617 for (int w : widths) {
618 n += w + 2;
619 }
620 for (int i = 0; i < n; i++) {
621 os << '-';
622 }
623 os << "|\n";
624 };
625
626 printHorizontalRow();
627
628 os << "|";
629 printElement(0, "Index");
630 printElement(1, "Name");
631 printElement(2, "Class");
632 printElement(3, "Size");
633 printElement(4, "From Data");
634 printElement(5, "1st value");
635 std::cout << "\n";
636
637 printHorizontalRow();
638
639 for (std::size_t iNode = 0; iNode < _nodes.size(); ++iNode) {
640 auto &nodeInfo = _nodes[iNode];
641 RooAbsArg *node = nodeInfo.absArg;
642
643 auto span = _dataMapCPU.at(node);
644
645 os << "|";
646 printElement(0, iNode);
647 printElement(1, node->GetName());
648 printElement(2, node->ClassName());
649 printElement(3, nodeInfo.outputSize);
650 printElement(4, nodeInfo.fromArrayInput);
651 printElement(5, span[0]);
652
653 std::cout << "\n";
654 }
655
656 printHorizontalRow();
657}
658
659/// Gets all the parameters of the RooAbsReal. This is in principle not
660/// necessary, because we can always ask the RooAbsReal itself, but the
661/// Evaluator has the cached information to get the answer quicker.
662/// Therefore, this is not meant to be used in general, just where it matters.
663/// \warning If we find another solution to get the parameters efficiently,
664/// this function might be removed without notice.
666{
667 RooArgSet parameters;
668 for (auto &nodeInfo : _nodes) {
669 if (!nodeInfo.fromArrayInput && nodeInfo.isVariable) {
670 parameters.add(*nodeInfo.absArg);
671 }
672 }
673 // Just like in RooAbsArg::getParameters(), we sort the parameters alphabetically.
674 parameters.sort();
675 return parameters;
676}
677
678} // namespace RooFit
#define oocoutI(o, a)
#define oocxcoutI(o, a)
char name[80]
Definition TGX11.cxx:110
Common abstract base class for objects that represent a value and a "shape" in RooFit.
Definition RooAbsArg.h:79
virtual bool canComputeBatchWithCuda() const
Definition RooAbsArg.h:576
virtual bool isReducerNode() const
Definition RooAbsArg.h:577
OperMode operMode() const
Query the operation mode of this node.
Definition RooAbsArg.h:484
A space to attach TBranches.
virtual bool add(const RooAbsArg &var, bool silent=false)
Add the specified argument to list.
Storage_t::size_type size() const
void sort(bool reverse=false)
Sort collection using std::sort and name comparison.
Abstract base class for objects that represent a real value and implements functionality common to al...
Definition RooAbsReal.h:59
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition RooArgSet.h:55
Minimal configuration struct to steer the evaluation of a single node with the RooBatchCompute librar...
std::span< const double > at(RooAbsArg const *arg, RooAbsArg const *caller=nullptr)
Definition DataMap.cxx:22
void set(RooAbsArg const *arg, std::span< const double > const &span)
Definition DataMap.h:91
void setConfig(RooAbsArg const *arg, RooBatchCompute::Config const &config)
Definition DataMap.cxx:32
void resize(std::size_t n)
Definition DataMap.cxx:49
void computeCPUNode(const RooAbsArg *node, NodeInfo &info)
void setOperMode(RooAbsArg *arg, RooAbsArg::OperMode opMode)
Temporarily change the operation mode of a RooAbsArg until the Evaluator gets deleted.
RooFit::Detail::DataMap _dataMapCPU
Definition Evaluator.h:66
std::span< const double > run()
Returns the value of the top node in the computation graph.
void markGPUNodes()
Decides which nodes are assigned to the GPU in a CUDA fit.
const bool _useGPU
Definition Evaluator.h:63
std::vector< NodeInfo > _nodes
Definition Evaluator.h:68
bool _needToUpdateOutputSizes
Definition Evaluator.h:65
void print(std::ostream &os) const
std::span< const double > getValHeterogeneous()
Returns the value of the top node in the computation graph.
RooFit::Detail::DataMap _dataMapCUDA
Definition Evaluator.h:67
RooArgSet getParameters() const
Gets all the parameters of the RooAbsReal.
void setInput(std::string const &name, std::span< const double > inputArray, bool isOnDevice)
void assignToGPU(NodeInfo &info)
Assign a node to be computed in the GPU.
void syncDataTokens()
If there are servers with the same name that got de-duplicated in the _nodes list,...
void processVariable(NodeInfo &nodeInfo)
Process a variable in the computation graph.
std::unique_ptr< Detail::BufferManager > _bufferManager
Definition Evaluator.h:61
std::stack< std::unique_ptr< ChangeOperModeRAII > > _changeOperModeRAIIs
Definition Evaluator.h:69
RooAbsReal & _topNode
Definition Evaluator.h:62
Evaluator(const RooAbsReal &absReal, bool useGPU=false)
Construct a new Evaluator.
void setClientsDirty(NodeInfo &nodeInfo)
Flags all the clients of a given node dirty.
static RooMsgService & instance()
Return reference to singleton instance.
static const TNamed * ptr(const char *stringPtr)
Return a unique TNamed pointer for given C++ string.
RooRealVar represents a variable that can be changed from the outside.
Definition RooRealVar.h:37
const char * GetName() const override
Returns name of object.
Definition TNamed.h:47
virtual const char * ClassName() const
Returns name of class to which the object belongs.
Definition TObject.cxx:207
const Int_t n
Definition legend1.C:16
std::string cpuArchitectureName()
Architecture cpuArchitecture()
void cudaEventRecord(CudaEvent &, CudaStream &)
Records a CUDA event.
void copyDeviceToHost(const T *src, T *dest, std::size_t n, CudaStream *=nullptr)
Copies data from the CUDA device to the host.
void copyHostToDevice(const T *src, T *dest, std::size_t n, CudaStream *=nullptr)
Copies data from the host to the CUDA device.
The namespace RooFit contains mostly switches that change the behaviour of functions of PDFs (or othe...
Definition JSONIO.h:26
@ FastEvaluations
void getSortedComputationGraph(RooAbsArg const &func, RooArgSet &out)
A struct used by the Evaluator to store information on the RooAbsArgs in the computation graph.
RooAbsArg * absArg
bool isScalar() const
std::size_t iNode
std::size_t lastSetValCount
std::vector< NodeInfo * > serverInfos
std::shared_ptr< Detail::AbsBuffer > buffer
RooAbsArg::OperMode originalOperMode
std::size_t outputSize
std::vector< NodeInfo * > clientInfos