doc/v630/Evaluator_8cxx_source.html

/*

 * Project: RooFit

 * Authors:

 *   Jonas Rembser, CERN 2021

 *   Emmanouil Michalainas, CERN 2021

 *

 * Copyright (c) 2021, CERN

 *

 * Redistribution and use in source and binary forms,

 * with or without modification, are permitted according to the terms

 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)

 */


/**

\file Evaluator.cxx

\class Evaluator

\ingroup Roofitcore


Evaluates a RooAbsReal object in other ways than recursive graph

traversal. Currently, it is being used for evaluating a RooAbsReal object and

supplying the value to the minimizer, during a fit. The class scans the

dependencies and schedules the computations in a secure and efficient way. The

computations take place in the RooBatchCompute library and can be carried off

by either the CPU or a CUDA-supporting GPU. The Evaluator class takes care

of data transfers. An instance of this class is created every time

RooAbsPdf::fitTo() is called and gets destroyed when the fitting ends.

**/


#include <RooFit/Evaluator.h>


#include <RooAbsCategory.h>

#include <RooAbsData.h>

#include <RooAbsReal.h>

#include <RooRealVar.h>

#include <RooBatchCompute.h>

#include <RooMsgService.h>

#include <RooNameReg.h>

#include <RooSimultaneous.h>


#include "BatchModeDataHelpers.h"

#include "BatchModeHelpers.h"

#include "Detail/Buffers.h"

#include "RooFitImplHelpers.h"


#include <chrono>

#include <iomanip>

#include <numeric>

#include <thread>


#ifdef ROOFIT_CUDA


#include <RooFit/Detail/CudaInterface.h>


namespace CudaInterface = RooFit::Detail::CudaInterface;


#endif


namespace RooFit {


namespace {


void logArchitectureInfo(bool useGPU)

{

   // We have to exit early if the message stream is not active. Otherwise it's

   // possible that this function skips logging because it thinks it has

   // already logged, but actually it didn't.

   if (!RooMsgService::instance().isActive(nullptr, RooFit::Fitting, RooFit::INFO)) {

      return;

   }


   // Don't repeat logging architecture info if the useGPU option didn't change

   {

      // Second element of pair tracks whether this function has already been called

      static std::pair<bool, bool> lastUseGPU;

      if (lastUseGPU.second && lastUseGPU.first == useGPU)

         return;

      lastUseGPU = {useGPU, true};

   }


   auto log = [](std::string_view message) {

      oocxcoutI(static_cast<RooAbsArg *>(nullptr), Fitting) << message << std::endl;

   };


   if (useGPU && !RooBatchCompute::hasCuda()) {

      throw std::runtime_error(std::string("In: ") + __func__ + "(), " + __FILE__ + ":" + __LINE__ +

                               ": Cuda implementation of the computing library is not available\n");

   }

   if (RooBatchCompute::cpuArchitecture() == RooBatchCompute::Architecture::GENERIC) {

      log("using generic CPU library compiled with no vectorizations");

   } else {

      log(std::string("using CPU computation library compiled with -m") + RooBatchCompute::cpuArchitectureName());

   }

   if (useGPU) {

      log("using CUDA computation library");

   }

}


} // namespace


/// A struct used by the Evaluator to store information on the RooAbsArgs in

/// the computation graph.

struct NodeInfo {


   bool isScalar() const { return outputSize == 1; }


#ifdef ROOFIT_CUDA

   bool computeInGPU() const { return (absArg->isReducerNode() || !isScalar()) && absArg->canComputeBatchWithCuda(); }

#endif


   RooAbsArg *absArg = nullptr;

   RooAbsArg::OperMode originalOperMode;


   std::shared_ptr<Detail::AbsBuffer> buffer;

   std::size_t iNode = 0;

   int remClients = 0;

   int remServers = 0;

#ifdef ROOFIT_CUDA

   bool copyAfterEvaluation = false;

#endif

   bool fromArrayInput = false;

   bool isVariable = false;

   bool isDirty = true;

   bool isCategory = false;

   bool hasLogged = false;

   std::size_t outputSize = 1;

   std::size_t lastSetValCount = std::numeric_limits<std::size_t>::max();

   double scalarBuffer = 0.0;

   std::vector<NodeInfo *> serverInfos;

   std::vector<NodeInfo *> clientInfos;


#ifdef ROOFIT_CUDA

   std::unique_ptr<RooFit::Detail::CudaInterface::CudaEvent> event;

   std::unique_ptr<RooFit::Detail::CudaInterface::CudaStream> stream;


   /// Check the servers of a node that has been computed and release it's resources

   /// if they are no longer needed.

   void decrementRemainingClients()

   {

      if (--remClients == 0 && !fromArrayInput) {

         buffer.reset();

      }

   }

#endif // ROOFIT_CUDA

};


/// Construct a new Evaluator. The constructor analyzes and saves metadata about the graph,

/// useful for the evaluation of it that will be done later. In case the CUDA mode is selected,

/// there's also some CUDA-related initialization.

///

/// \param[in] absReal The RooAbsReal object that sits on top of the

///            computation graph that we want to evaluate.

/// \param[in] useGPU Whether the evaluation should be preferably done on the GPU.

Evaluator::Evaluator(const RooAbsReal &absReal, bool useGPU)

   : _bufferManager{std::make_unique<Detail::BufferManager>()},

     _topNode{const_cast<RooAbsReal &>(absReal)},

     _useGPU{useGPU}

{

#ifndef ROOFIT_CUDA

   if (useGPU) {

      throw std::runtime_error("Can't create Evaluator in CUDA mode because ROOT was compiled without CUDA support!");

   }

#endif

   // Some checks and logging of used architectures

   logArchitectureInfo(_useGPU);


   RooArgSet serverSet;

   ::RooHelpers::getSortedComputationGraph(_topNode, serverSet);


   _dataMapCPU.resize(serverSet.size());

#ifdef ROOFIT_CUDA

   _dataMapCUDA.resize(serverSet.size());

#endif


   std::map<RooFit::Detail::DataKey, NodeInfo *> nodeInfos;


   // Fill the ordered nodes list and initialize the node info structs.

   _nodes.reserve(serverSet.size());

   std::size_t iNode = 0;

   for (RooAbsArg *arg : serverSet) {


      _nodes.emplace_back();

      auto &nodeInfo = _nodes.back();

      nodeInfo.absArg = arg;

      nodeInfo.originalOperMode = arg->operMode();

      nodeInfo.iNode = iNode;

      nodeInfos[arg] = &nodeInfo;


      if (dynamic_cast<RooRealVar const *>(arg)) {

         nodeInfo.isVariable = true;

      } else {

         arg->setDataToken(iNode);

      }

      if (dynamic_cast<RooAbsCategory const *>(arg)) {

         nodeInfo.isCategory = true;

      }


      ++iNode;

   }


   for (NodeInfo &info : _nodes) {

      info.serverInfos.reserve(info.absArg->servers().size());

      for (RooAbsArg *server : info.absArg->servers()) {

         if (server->isValueServer(*info.absArg)) {

            auto *serverInfo = nodeInfos.at(server);

            info.serverInfos.emplace_back(serverInfo);

            serverInfo->clientInfos.emplace_back(&info);

         }

      }

   }


   syncDataTokens();


#ifdef ROOFIT_CUDA

   if (_useGPU) {

      // create events and streams for every node

      for (auto &info : _nodes) {

         info.event = std::make_unique<CudaInterface::CudaEvent>(false);

         info.stream = std::make_unique<CudaInterface::CudaStream>();

         RooBatchCompute::Config cfg;

         cfg.setCudaStream(info.stream.get());

         _dataMapCUDA.setConfig(info.absArg, cfg);

      }

   }

#endif

}


/// If there are servers with the same name that got de-duplicated in the

/// `_nodes` list, we need to set their data tokens too. We find such nodes by

/// visiting the servers of every known node.

void Evaluator::syncDataTokens()

{

   for (NodeInfo &info : _nodes) {

      std::size_t iValueServer = 0;

      for (RooAbsArg *server : info.absArg->servers()) {

         if (server->isValueServer(*info.absArg)) {

            auto *knownServer = info.serverInfos[iValueServer]->absArg;

            if (knownServer->hasDataToken()) {

               server->setDataToken(knownServer->dataToken());

            }

            ++iValueServer;

         }

      }

   }

}


void Evaluator::setInput(std::string const &name, std::span<const double> inputArray, bool isOnDevice)

{

   if (isOnDevice && !_useGPU) {

      throw std::runtime_error("Evaluator can only take device array as input in CUDA mode!");

   }


   auto namePtr = RooNameReg::ptr(name.c_str());


   // Iterate over the given data spans and add them to the data map. Check if

   // they are used in the computation graph. If yes, add the span to the data

   // map and set the node info accordingly.

   std::size_t iNode = 0;

   for (auto &info : _nodes) {

      const bool fromArrayInput = info.absArg->namePtr() == namePtr;

      if (fromArrayInput) {

         info.fromArrayInput = true;

         info.absArg->setDataToken(iNode);

         info.outputSize = inputArray.size();

         if (_useGPU) {

#ifdef ROOFIT_CUDA

            if (info.outputSize == 1) {

               // Scalar observables from the data don't need to be copied to the GPU

               _dataMapCPU.set(info.absArg, inputArray);

               _dataMapCUDA.set(info.absArg, inputArray);

            } else {

               if (_useGPU) {

                  // For simplicity, we put the data on both host and device for

                  // now. This could be optimized by inspecting the clients of the

                  // variable.

                  if (isOnDevice) {

                     _dataMapCUDA.set(info.absArg, inputArray);

                     auto gpuSpan = _dataMapCUDA.at(info.absArg);

                     info.buffer = _bufferManager->makeCpuBuffer(gpuSpan.size());

                     CudaInterface::copyDeviceToHost(gpuSpan.data(), info.buffer->cpuWritePtr(), gpuSpan.size());

                     _dataMapCPU.set(info.absArg, {info.buffer->cpuReadPtr(), gpuSpan.size()});

                  } else {

                     _dataMapCPU.set(info.absArg, inputArray);

                     auto cpuSpan = _dataMapCPU.at(info.absArg);

                     info.buffer = _bufferManager->makeGpuBuffer(cpuSpan.size());

                     CudaInterface::copyHostToDevice(cpuSpan.data(), info.buffer->gpuWritePtr(), cpuSpan.size());

                     _dataMapCUDA.set(info.absArg, {info.buffer->gpuReadPtr(), cpuSpan.size()});

                  }

               } else {

                  _dataMapCPU.set(info.absArg, inputArray);

               }

            }

#endif

         } else {

            _dataMapCPU.set(info.absArg, inputArray);

         }

      }

      info.isDirty = !info.fromArrayInput;

      ++iNode;

   }


   _needToUpdateOutputSizes = true;

}


void Evaluator::updateOutputSizes()

{

   std::map<RooFit::Detail::DataKey, std::size_t> sizeMap;

   for (auto &info : _nodes) {

      if (info.fromArrayInput) {

         sizeMap[info.absArg] = info.outputSize;

      } else {

         // any buffer for temporary results is invalidated by resetting the output sizes

         info.buffer.reset();

      }

   }


   auto outputSizeMap = RooFit::BatchModeDataHelpers::determineOutputSizes(_topNode, [&](RooFit::Detail::DataKey key) {

      auto found = sizeMap.find(key);

      return found != sizeMap.end() ? found->second : 0;

   });


   for (auto &info : _nodes) {

      info.outputSize = outputSizeMap.at(info.absArg);


      // In principle we don't need dirty flag propagation because the driver

      // takes care of deciding which node needs to be re-evaluated. However,

      // disabling it also for scalar mode results in very long fitting times

      // for specific models (test 14 in stressRooFit), which still needs to be

      // understood. TODO.

      if (!info.isScalar()) {

         setOperMode(info.absArg, RooAbsArg::ADirty);

      } else {

         setOperMode(info.absArg, info.originalOperMode);

      }

   }


#ifdef ROOFIT_CUDA

   if (_useGPU) {

      markGPUNodes();

   }

#endif


   _needToUpdateOutputSizes = false;

}


Evaluator::~Evaluator()

{

   for (auto &info : _nodes) {

      info.absArg->resetDataToken();

   }

}


void Evaluator::computeCPUNode(const RooAbsArg *node, NodeInfo &info)

{

   using namespace Detail;


   auto nodeAbsReal = static_cast<RooAbsReal const *>(node);


   const std::size_t nOut = info.outputSize;


   double *buffer = nullptr;

   if (nOut == 1) {

      buffer = &info.scalarBuffer;

#ifdef ROOFIT_CUDA

      if (_useGPU) {

         _dataMapCUDA.set(node, {buffer, nOut});

      }

#endif

   } else {

#ifdef ROOFIT_CUDA

      if (!info.hasLogged && _useGPU) {

         RooAbsArg const &arg = *info.absArg;

         oocoutI(&arg, FastEvaluations) << "The argument " << arg.ClassName() << "::" << arg.GetName()

                                        << " could not be evaluated on the GPU because the class doesn't support it. "

                                           "Consider requesting or implementing it to benefit from a speed up."

                                        << std::endl;

         info.hasLogged = true;

      }

#endif

      if (!info.buffer) {

#ifdef ROOFIT_CUDA

         info.buffer = info.copyAfterEvaluation ? _bufferManager->makePinnedBuffer(nOut, info.stream.get())

                                                : _bufferManager->makeCpuBuffer(nOut);

#else

         info.buffer = _bufferManager->makeCpuBuffer(nOut);

#endif

      }

      buffer = info.buffer->cpuWritePtr();

   }

   _dataMapCPU.set(node, {buffer, nOut});

   nodeAbsReal->computeBatch(buffer, nOut, _dataMapCPU);

#ifdef ROOFIT_CUDA

   if (info.copyAfterEvaluation) {

      _dataMapCUDA.set(node, {info.buffer->gpuReadPtr(), nOut});

      if (info.event) {

         CudaInterface::cudaEventRecord(*info.event, *info.stream);

      }

   }

#endif

}


/// Process a variable in the computation graph. This is a separate non-inlined

/// function such that we can see in performance profiles how long this takes.

void Evaluator::processVariable(NodeInfo &nodeInfo)

{

   RooAbsArg *node = nodeInfo.absArg;

   auto *var = static_cast<RooRealVar const *>(node);

   if (nodeInfo.lastSetValCount != var->valueResetCounter()) {

      nodeInfo.lastSetValCount = var->valueResetCounter();

      for (NodeInfo *clientInfo : nodeInfo.clientInfos) {

         clientInfo->isDirty = true;

      }

      computeCPUNode(node, nodeInfo);

      nodeInfo.isDirty = false;

   }

}


/// Flags all the clients of a given node dirty. This is a separate non-inlined

/// function such that we can see in performance profiles how long this takes.

void Evaluator::setClientsDirty(NodeInfo &nodeInfo)

{

   for (NodeInfo *clientInfo : nodeInfo.clientInfos) {

      clientInfo->isDirty = true;

   }

}


/// Returns the value of the top node in the computation graph

std::span<const double> Evaluator::run()

{

   if (_needToUpdateOutputSizes)

      updateOutputSizes();


   ++_nEvaluations;


#ifdef ROOFIT_CUDA

   if (_useGPU) {

      return getValHeterogeneous();

   }

#endif


   for (auto &nodeInfo : _nodes) {

      if (!nodeInfo.fromArrayInput) {

         if (nodeInfo.isVariable) {

            processVariable(nodeInfo);

         } else {

            if (nodeInfo.isDirty) {

               setClientsDirty(nodeInfo);

               computeCPUNode(nodeInfo.absArg, nodeInfo);

               nodeInfo.isDirty = false;

            }

         }

      }

   }


   // return the final output

   return _dataMapCPU.at(&_topNode);

}


/// Returns the value of the top node in the computation graph

std::span<const double> Evaluator::getValHeterogeneous()

{

#ifdef ROOFIT_CUDA

   for (auto &info : _nodes) {

      info.remClients = info.clientInfos.size();

      info.remServers = info.serverInfos.size();

      if (info.buffer && !info.fromArrayInput) {

         info.buffer.reset();

      }

   }


   // find initial GPU nodes and assign them to GPU

   for (auto &info : _nodes) {

      if (info.remServers == 0 && info.computeInGPU()) {

         assignToGPU(info);

      }

   }


   NodeInfo const &topNodeInfo = _nodes.back();

   while (topNodeInfo.remServers != -2) {

      // find finished GPU nodes

      for (auto &info : _nodes) {

         if (info.remServers == -1 && !info.stream->isActive()) {

            info.remServers = -2;

            // Decrement number of remaining servers for clients and start GPU computations

            for (auto *infoClient : info.clientInfos) {

               --infoClient->remServers;

               if (infoClient->computeInGPU() && infoClient->remServers == 0) {

                  assignToGPU(*infoClient);

               }

            }

            for (auto *serverInfo : info.serverInfos) {

               serverInfo->decrementRemainingClients();

            }

         }

      }


      // find next CPU node

      auto it = _nodes.begin();

      for (; it != _nodes.end(); it++) {

         if (it->remServers == 0 && !it->computeInGPU())

            break;

      }


      // if no CPU node available sleep for a while to save CPU usage

      if (it == _nodes.end()) {

         std::this_thread::sleep_for(std::chrono::milliseconds(1));

         continue;

      }


      // compute next CPU node

      NodeInfo &info = *it;

      RooAbsArg const *node = info.absArg;

      info.remServers = -2; // so that it doesn't get picked again


      if (!info.fromArrayInput) {

         computeCPUNode(node, info);

      }


      // Assign the clients that are computed on the GPU

      for (auto *infoClient : info.clientInfos) {

         if (--infoClient->remServers == 0 && infoClient->computeInGPU()) {

            assignToGPU(*infoClient);

         }

      }

      for (auto *serverInfo : info.serverInfos) {

         serverInfo->decrementRemainingClients();

      }

   }


   // return the final value

   return _dataMapCUDA.at(&_topNode);

#else

   // Doesn't matter what we do here, because it's a private function that's

   // not called when RooFit is not built with CUDA support.

   return {};

#endif // ROOFIT_CUDA

}


/// Assign a node to be computed in the GPU. Scan it's clients and also assign them

/// in case they only depend on GPU nodes.

void Evaluator::assignToGPU(NodeInfo &info)

{

   using namespace Detail;


   info.remServers = -1;


#ifdef ROOFIT_CUDA

   auto node = static_cast<RooAbsReal const *>(info.absArg);


   // wait for every server to finish

   for (auto *infoServer : info.serverInfos) {

      if (infoServer->event)

         info.stream->waitForEvent(*infoServer->event);

   }


   const std::size_t nOut = info.outputSize;


   double *buffer = nullptr;

   if (nOut == 1) {

      buffer = &info.scalarBuffer;

      _dataMapCPU.set(node, {buffer, nOut});

   } else {

      info.buffer = info.copyAfterEvaluation ? _bufferManager->makePinnedBuffer(nOut, info.stream.get())

                                             : _bufferManager->makeGpuBuffer(nOut);

      buffer = info.buffer->gpuWritePtr();

   }

   _dataMapCUDA.set(node, {buffer, nOut});

   node->computeBatch(buffer, nOut, _dataMapCUDA);

   CudaInterface::cudaEventRecord(*info.event, *info.stream);

   if (info.copyAfterEvaluation) {

      _dataMapCPU.set(node, {info.buffer->cpuReadPtr(), nOut});

   }

#endif // ROOFIT_CUDA

}


/// Decides which nodes are assigned to the GPU in a CUDA fit.

void Evaluator::markGPUNodes()

{

#ifdef ROOFIT_CUDA

   for (auto &info : _nodes) {

      info.copyAfterEvaluation = false;

      // scalar nodes don't need copying

      if (!info.isScalar()) {

         for (auto *clientInfo : info.clientInfos) {

            if (info.computeInGPU() != clientInfo->computeInGPU()) {

               info.copyAfterEvaluation = true;

               break;

            }

         }

      }

   }

#endif // ROOFIT_CUDA

}


/// Temporarily change the operation mode of a RooAbsArg until the

/// Evaluator gets deleted.

void Evaluator::setOperMode(RooAbsArg *arg, RooAbsArg::OperMode opMode)

{

   if (opMode != arg->operMode()) {

      _changeOperModeRAIIs.emplace(std::make_unique<ChangeOperModeRAII>(arg, opMode));

   }

}


void Evaluator::print(std::ostream &os) const

{

   std::cout << "--- RooFit BatchMode evaluation ---\n";


   std::vector<int> widths{9, 37, 20, 9, 10, 20};


   auto printElement = [&](int iCol, auto const &t) {

      const char separator = ' ';

      os << separator << std::left << std::setw(widths[iCol]) << std::setfill(separator) << t;

      os << "|";

   };


   auto printHorizontalRow = [&]() {

      int n = 0;

      for (int w : widths) {

         n += w + 2;

      }

      for (int i = 0; i < n; i++) {

         os << '-';

      }

      os << "|\n";

   };


   printHorizontalRow();


   os << "|";

   printElement(0, "Index");

   printElement(1, "Name");

   printElement(2, "Class");

   printElement(3, "Size");

   printElement(4, "From Data");

   printElement(5, "1st value");

   std::cout << "\n";


   printHorizontalRow();


   for (std::size_t iNode = 0; iNode < _nodes.size(); ++iNode) {

      auto &nodeInfo = _nodes[iNode];

      RooAbsArg *node = nodeInfo.absArg;


      auto span = _dataMapCPU.at(node);


      os << "|";

      printElement(0, iNode);

      printElement(1, node->GetName());

      printElement(2, node->ClassName());

      printElement(3, nodeInfo.outputSize);

      printElement(4, nodeInfo.fromArrayInput);

      printElement(5, span[0]);


      std::cout << "\n";

   }


   printHorizontalRow();

}


/// Gets all the parameters of the RooAbsReal. This is in principle not

/// necessary, because we can always ask the RooAbsReal itself, but the

/// Evaluator has the cached information to get the answer quicker.

/// Therefore, this is not meant to be used in general, just where it matters.

/// \warning If we find another solution to get the parameters efficiently,

/// this function might be removed without notice.

RooArgSet Evaluator::getParameters() const

{

   RooArgSet parameters;

   for (auto &nodeInfo : _nodes) {

      if (!nodeInfo.fromArrayInput && nodeInfo.isVariable) {

         parameters.add(*nodeInfo.absArg);

      }

   }

   // Just like in RooAbsArg::getParameters(), we sort the parameters alphabetically.

   parameters.sort();

   return parameters;

}


} // namespace RooFit

BatchModeDataHelpers.h

BatchModeHelpers.h

Buffers.h

CudaInterface.h

Evaluator.h

RooAbsCategory.h

RooAbsData.h

RooAbsReal.h

RooBatchCompute.h

RooFitImplHelpers.h

RooMsgService.h

oocoutI
#define oocoutI(o, a)
Definition RooMsgService.h:49

oocxcoutI
#define oocxcoutI(o, a)
Definition RooMsgService.h:91

RooNameReg.h

RooRealVar.h

RooSimultaneous.h

w
winID w
Definition TGWin32VirtualGLProxy.cxx:39

name
char name[80]
Definition TGX11.cxx:110

RooAbsArg
Common abstract base class for objects that represent a value and a "shape" in RooFit.
Definition RooAbsArg.h:79

RooAbsArg::canComputeBatchWithCuda
virtual bool canComputeBatchWithCuda() const
Definition RooAbsArg.h:576

RooAbsArg::OperMode
OperMode
Definition RooAbsArg.h:390

RooAbsArg::ADirty
@ ADirty
Definition RooAbsArg.h:390

RooAbsArg::isReducerNode
virtual bool isReducerNode() const
Definition RooAbsArg.h:577

RooAbsArg::operMode
OperMode operMode() const
Query the operation mode of this node.
Definition RooAbsArg.h:484

RooAbsCategory
A space to attach TBranches.
Definition RooAbsCategory.h:33

RooAbsCollection::add
virtual bool add(const RooAbsArg &var, bool silent=false)
Add the specified argument to list.
Definition RooAbsCollection.cxx:435

RooAbsCollection::size
Storage_t::size_type size() const
Definition RooAbsCollection.h:263

RooAbsCollection::sort
void sort(bool reverse=false)
Sort collection using std::sort and name comparison.
Definition RooAbsCollection.cxx:1486

RooAbsReal
Abstract base class for objects that represent a real value and implements functionality common to al...
Definition RooAbsReal.h:59

RooArgSet
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition RooArgSet.h:55

RooBatchCompute::Config
Minimal configuration struct to steer the evaluation of a single node with the RooBatchCompute librar...
Definition RooBatchCompute.h:54

RooFit::Detail::DataKey
Definition DataMap.h:46

RooFit::Detail::DataMap::at
std::span< const double > at(RooAbsArg const *arg, RooAbsArg const *caller=nullptr)
Definition DataMap.cxx:22

RooFit::Detail::DataMap::set
void set(RooAbsArg const *arg, std::span< const double > const &span)
Definition DataMap.h:91

RooFit::Detail::DataMap::setConfig
void setConfig(RooAbsArg const *arg, RooBatchCompute::Config const &config)
Definition DataMap.cxx:32

RooFit::Detail::DataMap::resize
void resize(std::size_t n)
Definition DataMap.cxx:49

RooFit::Evaluator::updateOutputSizes
void updateOutputSizes()
Definition Evaluator.cxx:304

RooFit::Evaluator::computeCPUNode
void computeCPUNode(const RooAbsArg *node, NodeInfo &info)
Definition Evaluator.cxx:352

RooFit::Evaluator::setOperMode
void setOperMode(RooAbsArg *arg, RooAbsArg::OperMode opMode)
Temporarily change the operation mode of a RooAbsArg until the Evaluator gets deleted.
Definition Evaluator.cxx:596

RooFit::Evaluator::_dataMapCPU
RooFit::Detail::DataMap _dataMapCPU
Definition Evaluator.h:66

RooFit::Evaluator::run
std::span< const double > run()
Returns the value of the top node in the computation graph.
Definition Evaluator.cxx:427

RooFit::Evaluator::markGPUNodes
void markGPUNodes()
Decides which nodes are assigned to the GPU in a CUDA fit.
Definition Evaluator.cxx:576

RooFit::Evaluator::_useGPU
const bool _useGPU
Definition Evaluator.h:63

RooFit::Evaluator::_nodes
std::vector< NodeInfo > _nodes
Definition Evaluator.h:68

RooFit::Evaluator::_nEvaluations
int _nEvaluations
Definition Evaluator.h:64

RooFit::Evaluator::_needToUpdateOutputSizes
bool _needToUpdateOutputSizes
Definition Evaluator.h:65

RooFit::Evaluator::print
void print(std::ostream &os) const
Definition Evaluator.cxx:603

RooFit::Evaluator::getValHeterogeneous
std::span< const double > getValHeterogeneous()
Returns the value of the top node in the computation graph.
Definition Evaluator.cxx:459

RooFit::Evaluator::_dataMapCUDA
RooFit::Detail::DataMap _dataMapCUDA
Definition Evaluator.h:67

RooFit::Evaluator::getParameters
RooArgSet getParameters() const
Gets all the parameters of the RooAbsReal.
Definition Evaluator.cxx:665

RooFit::Evaluator::setInput
void setInput(std::string const &name, std::span< const double > inputArray, bool isOnDevice)
Definition Evaluator.cxx:246

RooFit::Evaluator::~Evaluator
~Evaluator()
Definition Evaluator.cxx:345

RooFit::Evaluator::assignToGPU
void assignToGPU(NodeInfo &info)
Assign a node to be computed in the GPU.
Definition Evaluator.cxx:540

RooFit::Evaluator::syncDataTokens
void syncDataTokens()
If there are servers with the same name that got de-duplicated in the _nodes list,...
Definition Evaluator.cxx:230

RooFit::Evaluator::processVariable
void processVariable(NodeInfo &nodeInfo)
Process a variable in the computation graph.
Definition Evaluator.cxx:403

RooFit::Evaluator::_bufferManager
std::unique_ptr< Detail::BufferManager > _bufferManager
Definition Evaluator.h:61

RooFit::Evaluator::_changeOperModeRAIIs
std::stack< std::unique_ptr< ChangeOperModeRAII > > _changeOperModeRAIIs
Definition Evaluator.h:69

RooFit::Evaluator::_topNode
RooAbsReal & _topNode
Definition Evaluator.h:62

RooFit::Evaluator::Evaluator
Evaluator(const RooAbsReal &absReal, bool useGPU=false)
Construct a new Evaluator.
Definition Evaluator.cxx:153

RooFit::Evaluator::setClientsDirty
void setClientsDirty(NodeInfo &nodeInfo)
Flags all the clients of a given node dirty.
Definition Evaluator.cxx:419

RooMsgService::instance
static RooMsgService & instance()
Return reference to singleton instance.
Definition RooMsgService.cxx:345

RooNameReg::ptr
static const TNamed * ptr(const char *stringPtr)
Return a unique TNamed pointer for given C++ string.
Definition RooNameReg.cxx:81

RooRealVar
RooRealVar represents a variable that can be changed from the outside.
Definition RooRealVar.h:37

TNamed::GetName
const char * GetName() const override
Returns name of object.
Definition TNamed.h:47

TObject::ClassName
virtual const char * ClassName() const
Returns name of class to which the object belongs.
Definition TObject.cxx:207

n
const Int_t n
Definition legend1.C:16

RooBatchCompute::Architecture::GENERIC
@ GENERIC

RooBatchCompute::cpuArchitectureName
std::string cpuArchitectureName()
Definition RooBatchCompute.h:173

RooBatchCompute::hasCuda
bool hasCuda()
Definition RooBatchCompute.h:179

RooBatchCompute::cpuArchitecture
Architecture cpuArchitecture()
Definition RooBatchCompute.h:167

RooFit::Detail::CudaInterface
Definition CudaInterface.h:29

RooFit::Detail::CudaInterface::cudaEventRecord
void cudaEventRecord(CudaEvent &, CudaStream &)
Records a CUDA event.
Definition CudaInterface.cu:97

RooFit::Detail::CudaInterface::copyDeviceToHost
void copyDeviceToHost(const T *src, T *dest, std::size_t n, CudaStream *=nullptr)
Copies data from the CUDA device to the host.
Definition CudaInterface.h:105

RooFit::Detail::CudaInterface::copyHostToDevice
void copyHostToDevice(const T *src, T *dest, std::size_t n, CudaStream *=nullptr)
Copies data from the host to the CUDA device.
Definition CudaInterface.h:91

RooFit
The namespace RooFit contains mostly switches that change the behaviour of functions of PDFs (or othe...
Definition JSONIO.h:26

RooFit::INFO
@ INFO
Definition RooGlobalFunc.h:60

RooFit::FastEvaluations
@ FastEvaluations
Definition RooGlobalFunc.h:64

RooFit::Fitting
@ Fitting
Definition RooGlobalFunc.h:62

RooHelpers::getSortedComputationGraph
void getSortedComputationGraph(RooAbsArg const &func, RooArgSet &out)

RooFit::NodeInfo
A struct used by the Evaluator to store information on the RooAbsArgs in the computation graph.
Definition Evaluator.cxx:102

RooFit::NodeInfo::absArg
RooAbsArg * absArg
Definition Evaluator.cxx:110

RooFit::NodeInfo::isScalar
bool isScalar() const
Definition Evaluator.cxx:104

RooFit::NodeInfo::remServers
int remServers
Definition Evaluator.cxx:116

RooFit::NodeInfo::iNode
std::size_t iNode
Definition Evaluator.cxx:114

RooFit::NodeInfo::fromArrayInput
bool fromArrayInput
Definition Evaluator.cxx:120

RooFit::NodeInfo::remClients
int remClients
Definition Evaluator.cxx:115

RooFit::NodeInfo::lastSetValCount
std::size_t lastSetValCount
Definition Evaluator.cxx:126

RooFit::NodeInfo::serverInfos
std::vector< NodeInfo * > serverInfos
Definition Evaluator.cxx:128

RooFit::NodeInfo::buffer
std::shared_ptr< Detail::AbsBuffer > buffer
Definition Evaluator.cxx:113

RooFit::NodeInfo::isVariable
bool isVariable
Definition Evaluator.cxx:121

RooFit::NodeInfo::scalarBuffer
double scalarBuffer
Definition Evaluator.cxx:127

RooFit::NodeInfo::isCategory
bool isCategory
Definition Evaluator.cxx:123

RooFit::NodeInfo::originalOperMode
RooAbsArg::OperMode originalOperMode
Definition Evaluator.cxx:111

RooFit::NodeInfo::outputSize
std::size_t outputSize
Definition Evaluator.cxx:125

RooFit::NodeInfo::isDirty
bool isDirty
Definition Evaluator.cxx:122

RooFit::NodeInfo::hasLogged
bool hasLogged
Definition Evaluator.cxx:124

RooFit::NodeInfo::clientInfos
std::vector< NodeInfo * > clientInfos
Definition Evaluator.cxx:129

event
Definition triangle.c:553