Logo ROOT  
Reference Guide
Loading...
Searching...
No Matches
ComputeFunctions.cxx
Go to the documentation of this file.
1/*
2 * Project: RooFit
3 * Authors:
4 * Emmanouil Michalainas, CERN, Summer 2019
5 *
6 * Copyright (c) 2021, CERN
7 *
8 * Redistribution and use in source and binary forms,
9 * with or without modification, are permitted according to the terms
10 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
11 */
12
13/**
14\file ComputeFunctions.cxx
15
16This file contains vectorizable computation functions for PDFs and other Roofit objects.
17The same source file can also be compiled with nvcc. All functions have a single `Batches`
18object as an argument passed by value, which contains all the information necessary for the
19computation. In case of cuda computations, the loops have a step (stride) the size of the grid
20which allows for reusing the same code as the cpu implementations, easier debugging and in terms
21of performance, maximum memory coalescing. For more details, see
22https://developer.nvidia.com/blog/cuda-pro-tip-write-flexible-kernels-grid-stride-loops/
23**/
24
25#include "RooBatchCompute.h"
26#include "RooNaNPacker.h"
27#include "RooVDTHeaders.h"
28#include "Batches.h"
29
30#include <TMath.h>
31
33
34#include <vector>
35
36#ifdef __CUDACC__
37#define BEGIN blockDim.x *blockIdx.x + threadIdx.x
38#define STEP blockDim.x *gridDim.x
39#else
40#define BEGIN 0
41#define STEP 1
42#endif // #ifdef __CUDACC__
43
44namespace RooBatchCompute {
45namespace RF_ARCH {
46
48{
49 const int nPdfs = batches.nExtra;
50 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
51 batches.output[i] = batches.extra[0] * batches.args[0][i];
52 }
53 for (int pdf = 1; pdf < nPdfs; pdf++) {
54 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
55 batches.output[i] += batches.extra[pdf] * batches.args[pdf][i];
56 }
57 }
58}
59
61{
62 Batch m = batches.args[0];
63 Batch m0 = batches.args[1];
64 Batch c = batches.args[2];
65 Batch p = batches.args[3];
66 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
67 const double t = m[i] / m0[i];
68 const double u = 1 - t * t;
69 batches.output[i] = c[i] * u + p[i] * fast_log(u);
70 }
71 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
72 if (m[i] >= m0[i]) {
73 batches.output[i] = 0.0;
74 } else {
75 batches.output[i] = m[i] * fast_exp(batches.output[i]);
76 }
77 }
78}
79
81{
82 Batch coef0 = batches.args[0];
83 Batch coef1 = batches.args[1];
84 Batch tagFlav = batches.args[2];
85 Batch delMistag = batches.args[3];
86 Batch mixState = batches.args[4];
87 Batch mistag = batches.args[5];
88
89 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
90 batches.output[i] =
91 coef0[i] * (1.0 - tagFlav[i] * delMistag[0]) + coef1[i] * (mixState[i] * (1.0 - 2.0 * mistag[0]));
92 }
93}
94
96{
97 const int nCoef = batches.nExtra - 2;
98 const int degree = nCoef - 1;
99 const double xmin = batches.extra[nCoef];
100 const double xmax = batches.extra[nCoef + 1];
101 Batch xData = batches.args[0];
102
103 // apply binomial coefficient in-place so we don't have to allocate new memory
104 double binomial = 1.0;
105 for (int k = 0; k < nCoef; k++) {
106 batches.extra[k] = batches.extra[k] * binomial;
107 binomial = (binomial * (degree - k)) / (k + 1);
108 }
109
110 if (STEP == 1) {
111 double X[bufferSize];
112 double _1_X[bufferSize];
113 double powX[bufferSize];
114 double pow_1_X[bufferSize];
115 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
116 powX[i] = pow_1_X[i] = 1.0;
117 X[i] = (xData[i] - xmin) / (xmax - xmin);
118 _1_X[i] = 1 - X[i];
119 batches.output[i] = 0.0;
120 }
121
122 // raising 1-x to the power of degree
123 for (int k = 2; k <= degree; k += 2) {
124 for (size_t i = BEGIN; i < batches.nEvents; i += STEP)
125 pow_1_X[i] *= _1_X[i] * _1_X[i];
126 }
127
128 if (degree % 2 == 1) {
129 for (size_t i = BEGIN; i < batches.nEvents; i += STEP)
130 pow_1_X[i] *= _1_X[i];
131 }
132
133 // inverting 1-x ---> 1/(1-x)
134 for (size_t i = BEGIN; i < batches.nEvents; i += STEP)
135 _1_X[i] = 1 / _1_X[i];
136
137 for (int k = 0; k < nCoef; k++) {
138 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
139 batches.output[i] += batches.extra[k] * powX[i] * pow_1_X[i];
140
141 // calculating next power for x and 1-x
142 powX[i] *= X[i];
143 pow_1_X[i] *= _1_X[i];
144 }
145 }
146 } else {
147 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
148 batches.output[i] = 0.0;
149 const double X = (xData[i] - xmin) / (xmax - xmin);
150 double powX = 1.0;
151 double pow_1_X = 1.0;
152 for (int k = 1; k <= degree; k++)
153 pow_1_X *= 1 - X;
154 const double _1_X = 1 / (1 - X);
155 for (int k = 0; k < nCoef; k++) {
156 batches.output[i] += batches.extra[k] * powX * pow_1_X;
157 powX *= X;
158 pow_1_X *= _1_X;
159 }
160 }
161 }
162
163 // reset extraArgs values so we don't mutate the Batches object
164 binomial = 1.0;
165 for (int k = 0; k < nCoef; k++) {
166 batches.extra[k] = batches.extra[k] / binomial;
167 binomial = (binomial * (degree - k)) / (k + 1);
168 }
169}
170
172{
173 Batch X = batches.args[0];
174 Batch M = batches.args[1];
175 Batch SL = batches.args[2];
176 Batch SR = batches.args[3];
177 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
178 double arg = X[i] - M[i];
179 if (arg < 0) {
180 arg /= SL[i];
181 } else {
182 arg /= SR[i];
183 }
184 batches.output[i] = fast_exp(-0.5 * arg * arg);
185 }
186}
187
189{
190 Batch X = batches.args[0];
191 Batch M = batches.args[1];
192 Batch W = batches.args[2];
193 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
194 const double arg = X[i] - M[i];
195 batches.output[i] = 1 / (arg * arg + 0.25 * W[i] * W[i]);
196 }
197}
198
200{
201 Batch X = batches.args[0];
202 Batch XP = batches.args[1];
203 Batch SP = batches.args[2];
204 Batch XI = batches.args[3];
205 Batch R1 = batches.args[4];
206 Batch R2 = batches.args[5];
207 const double r3 = log(2.0);
208 const double r6 = exp(-6.0);
209 const double r7 = 2 * sqrt(2 * log(2.0));
210
211 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
212 const double r1 = XI[i] * fast_isqrt(XI[i] * XI[i] + 1);
213 const double r4 = 1 / fast_isqrt(XI[i] * XI[i] + 1);
214 const double hp = 1 / (SP[i] * r7);
215 const double x1 = XP[i] + 0.5 * SP[i] * r7 * (r1 - 1);
216 const double x2 = XP[i] + 0.5 * SP[i] * r7 * (r1 + 1);
217
218 double r5 = 1.0;
219 if (XI[i] > r6 || XI[i] < -r6)
220 r5 = XI[i] / fast_log(r4 + XI[i]);
221
222 double factor = 1;
223 double y = X[i] - x1;
224 double Yp = XP[i] - x1;
225 double yi = r4 - XI[i];
226 double rho = R1[i];
227 if (X[i] >= x2) {
228 factor = -1;
229 y = X[i] - x2;
230 Yp = XP[i] - x2;
231 yi = r4 + XI[i];
232 rho = R2[i];
233 }
234
235 batches.output[i] = rho * y * y / Yp / Yp - r3 + factor * 4 * r3 * y * hp * r5 * r4 / yi / yi;
236 if (X[i] >= x1 && X[i] < x2) {
237 batches.output[i] =
238 fast_log(1 + 4 * XI[i] * r4 * (X[i] - XP[i]) * hp) / fast_log(1 + 2 * XI[i] * (XI[i] - r4));
239 batches.output[i] *= -batches.output[i] * r3;
240 }
241 if (X[i] >= x1 && X[i] < x2 && XI[i] < r6 && XI[i] > -r6)
242 batches.output[i] = -4 * r3 * (X[i] - XP[i]) * (X[i] - XP[i]) * hp * hp;
243 }
244 for (size_t i = BEGIN; i < batches.nEvents; i += STEP)
245 batches.output[i] = fast_exp(batches.output[i]);
246}
247
249{
250 Batch M = batches.args[0];
251 Batch M0 = batches.args[1];
252 Batch S = batches.args[2];
253 Batch A = batches.args[3];
254 Batch N = batches.args[4];
255 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
256 const double t = (M[i] - M0[i]) / S[i];
257 if ((A[i] > 0 && t >= -A[i]) || (A[i] < 0 && -t >= A[i])) {
258 batches.output[i] = -0.5 * t * t;
259 } else {
260 batches.output[i] = N[i] / (N[i] - A[i] * A[i] - A[i] * t);
261 batches.output[i] = fast_log(batches.output[i]);
262 batches.output[i] *= N[i];
263 batches.output[i] -= 0.5 * A[i] * A[i];
264 }
265 }
266 for (size_t i = BEGIN; i < batches.nEvents; i += STEP)
267 batches.output[i] = fast_exp(batches.output[i]);
268}
269
271{
272 Batch xData = batches.args[0];
273 const int nCoef = batches.nExtra - 2;
274 const double xmin = batches.extra[nCoef];
275 const double xmax = batches.extra[nCoef + 1];
276
277 if (STEP == 1) {
278 double prev[bufferSize][2];
279 double X[bufferSize];
280
281 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
282 // set a0-->prev[i][0] and a1-->prev[i][1]
283 // and x tranfsformed to range[-1..1]-->X[i]
284 prev[i][0] = batches.output[i] = 1.0;
285 prev[i][1] = X[i] = 2 * (xData[i] - 0.5 * (xmax + xmin)) / (xmax - xmin);
286 }
287 for (int k = 0; k < nCoef; k++) {
288 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
289 batches.output[i] += prev[i][1] * batches.extra[k];
290
291 // compute next order
292 const double next = 2 * X[i] * prev[i][1] - prev[i][0];
293 prev[i][0] = prev[i][1];
294 prev[i][1] = next;
295 }
296 }
297 } else {
298 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
299 double prev0 = 1.0;
300 double prev1 = 2 * (xData[i] - 0.5 * (xmax + xmin)) / (xmax - xmin);
301 double X = prev1;
302 batches.output[i] = 1.0;
303 for (int k = 0; k < nCoef; k++) {
304 batches.output[i] += prev1 * batches.extra[k];
305
306 // compute next order
307 const double next = 2 * X * prev1 - prev0;
308 prev0 = prev1;
309 prev1 = next;
310 }
311 }
312 }
313}
314
316{
317 Batch X = batches.args[0];
318 const double ndof = batches.extra[0];
319 const double gamma = 1 / std::tgamma(ndof / 2.0);
320 for (size_t i = BEGIN; i < batches.nEvents; i += STEP)
321 batches.output[i] = gamma;
322
323 constexpr double ln2 = 0.693147180559945309417232121458;
324 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
325 double arg = (ndof - 2) * fast_log(X[i]) - X[i] - ndof * ln2;
326 batches.output[i] *= fast_exp(0.5 * arg);
327 }
328}
329
331{
332 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
333 batches.output[i] = 0.0 + (batches.args[0][i] == 1.0);
334 }
335}
336
338{
339 Batch DM = batches.args[0];
340 Batch DM0 = batches.args[1];
341 Batch C = batches.args[2];
342 Batch A = batches.args[3];
343 Batch B = batches.args[4];
344 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
345 const double ratio = DM[i] / DM0[i];
346 const double arg1 = (DM0[i] - DM[i]) / C[i];
347 const double arg2 = A[i] * fast_log(ratio);
348 batches.output[i] = (1 - fast_exp(arg1)) * fast_exp(arg2) + B[i] * (ratio - 1);
349 }
350
351 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
352 if (batches.output[i] < 0)
353 batches.output[i] = 0;
354 }
355}
356
358{
359 int lowestOrder = batches.extra[0];
360 int nTerms = batches.extra[1];
361 auto x = batches.args[0];
362
363 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
364 batches.output[i] = 0.0;
365 double xTmp = std::pow(x[i], lowestOrder);
366 for (int k = 0; k < nTerms; ++k) {
367 batches.output[i] += batches.args[k + 1][i] * xTmp;
368 xTmp *= x[i];
369 }
370 batches.output[i] = std::exp(batches.output[i]);
371 }
372}
373
375{
376 Batch x = batches.args[0];
377 Batch c = batches.args[1];
378 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
379 batches.output[i] = fast_exp(x[i] * c[i]);
380 }
381}
382
384{
385 Batch x = batches.args[0];
386 Batch c = batches.args[1];
387 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
388 batches.output[i] = fast_exp(-x[i] * c[i]);
389 }
390}
391
393{
394 Batch X = batches.args[0];
395 Batch G = batches.args[1];
396 Batch B = batches.args[2];
397 Batch M = batches.args[3];
398 double gamma = -std::lgamma(G[0]);
399 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
400 if (X[i] == M[i]) {
401 batches.output[i] = int(G[i] == 1.0) / B[i];
402 } else if (G._isVector) {
403 batches.output[i] = -std::lgamma(G[i]);
404 } else {
405 batches.output[i] = gamma;
406 }
407 }
408
409 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
410 if (X[i] != M[i]) {
411 const double invBeta = 1 / B[i];
412 double arg = (X[i] - M[i]) * invBeta;
413 batches.output[i] -= arg;
414 arg = fast_log(arg);
415 batches.output[i] += arg * (G[i] - 1);
416 batches.output[i] = fast_exp(batches.output[i]);
417 batches.output[i] *= invBeta;
418 }
419 }
420}
421
423{
424 const double root2 = std::sqrt(2.);
425 const double root2pi = std::sqrt(2. * std::atan2(0., -1.));
426
427 const bool isMinus = batches.extra[0] < 0.0;
428 const bool isPlus = batches.extra[0] > 0.0;
429
430 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
431
432 const double x = batches.args[0][i];
433 const double mean = batches.args[1][i] * batches.args[2][i];
434 const double sigma = batches.args[3][i] * batches.args[4][i];
435 const double tau = batches.args[5][i];
436
437 if (tau == 0.0) {
438 // Straight Gaussian, used for unconvoluted PDF or expBasis with 0 lifetime
439 double xprime = (x - mean) / sigma;
440 double result = std::exp(-0.5 * xprime * xprime) / (sigma * root2pi);
441 if (!isMinus && !isPlus)
442 result *= 2;
443 batches.output[i] = result;
444 } else {
445 // Convolution with exp(-t/tau)
446 const double xprime = (x - mean) / tau;
447 const double c = sigma / (root2 * tau);
448 const double u = xprime / (2 * c);
449
450 double result = 0.0;
451 if (!isMinus)
452 result += RooHeterogeneousMath::evalCerf(0, -u, c).real();
453 if (!isPlus)
454 result += RooHeterogeneousMath::evalCerf(0, u, c).real();
455 batches.output[i] = result;
456 }
457 }
458}
459
461{
462 auto x = batches.args[0];
463 auto mean = batches.args[1];
464 auto sigma = batches.args[2];
465 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
466 const double arg = x[i] - mean[i];
467 const double halfBySigmaSq = -0.5 / (sigma[i] * sigma[i]);
468 batches.output[i] = fast_exp(arg * arg * halfBySigmaSq);
469 }
470}
471
473{
474 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
475 batches.output[i] = batches.args[0][i];
476 }
477}
478
480{
481 for (size_t i = BEGIN; i < batches.nEvents; i += STEP)
482 batches.output[i] = -fast_log(batches.args[0][i]);
483 // Multiply by weights if they exist
484 if (batches.extra[0]) {
485 for (size_t i = BEGIN; i < batches.nEvents; i += STEP)
486 batches.output[i] *= batches.args[1][i];
487 }
488}
489
491{
492 Batch mass = batches.args[0];
493 Batch mu = batches.args[1];
494 Batch lambda = batches.args[2];
495 Batch gamma = batches.args[3];
496 Batch delta = batches.args[4];
497 const double sqrtTwoPi = std::sqrt(TMath::TwoPi());
498 const double massThreshold = batches.extra[0];
499
500 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
501 const double arg = (mass[i] - mu[i]) / lambda[i];
502#ifdef R__HAS_VDT
503 const double asinh_arg = fast_log(arg + 1 / fast_isqrt(arg * arg + 1));
504#else
505 const double asinh_arg = asinh(arg);
506#endif
507 const double expo = gamma[i] + delta[i] * asinh_arg;
508 const double result =
509 delta[i] * fast_exp(-0.5 * expo * expo) * fast_isqrt(1. + arg * arg) / (sqrtTwoPi * lambda[i]);
510
511 const double passThrough = mass[i] >= massThreshold;
512 batches.output[i] = result * passThrough;
513 }
514}
515
516/* Actual computation of Landau(x,mean,sigma) in a vectorization-friendly way
517 * Code copied from function landau_pdf (math/mathcore/src/PdfFuncMathCore.cxx)
518 * and rewritten to enable vectorization.
519 */
521{
522 auto case0 = [](double x) {
523 const double a1[3] = {0.04166666667, -0.01996527778, 0.02709538966};
524 const double u = fast_exp(x + 1.0);
525 return 0.3989422803 * fast_exp(-1 / u - 0.5 * (x + 1)) * (1 + (a1[0] + (a1[1] + a1[2] * u) * u) * u);
526 };
527 auto case1 = [](double x) {
528 constexpr double p1[5] = {0.4259894875, -0.1249762550, 0.03984243700, -0.006298287635, 0.001511162253};
529 constexpr double q1[5] = {1.0, -0.3388260629, 0.09594393323, -0.01608042283, 0.003778942063};
530 const double u = fast_exp(-x - 1);
531 return fast_exp(-u - 0.5 * (x + 1)) * (p1[0] + (p1[1] + (p1[2] + (p1[3] + p1[4] * x) * x) * x) * x) /
532 (q1[0] + (q1[1] + (q1[2] + (q1[3] + q1[4] * x) * x) * x) * x);
533 };
534 auto case2 = [](double x) {
535 constexpr double p2[5] = {0.1788541609, 0.1173957403, 0.01488850518, -0.001394989411, 0.0001283617211};
536 constexpr double q2[5] = {1.0, 0.7428795082, 0.3153932961, 0.06694219548, 0.008790609714};
537 return (p2[0] + (p2[1] + (p2[2] + (p2[3] + p2[4] * x) * x) * x) * x) /
538 (q2[0] + (q2[1] + (q2[2] + (q2[3] + q2[4] * x) * x) * x) * x);
539 };
540 auto case3 = [](double x) {
541 constexpr double p3[5] = {0.1788544503, 0.09359161662, 0.006325387654, 0.00006611667319, -0.000002031049101};
542 constexpr double q3[5] = {1.0, 0.6097809921, 0.2560616665, 0.04746722384, 0.006957301675};
543 return (p3[0] + (p3[1] + (p3[2] + (p3[3] + p3[4] * x) * x) * x) * x) /
544 (q3[0] + (q3[1] + (q3[2] + (q3[3] + q3[4] * x) * x) * x) * x);
545 };
546 auto case4 = [](double x) {
547 constexpr double p4[5] = {0.9874054407, 118.6723273, 849.2794360, -743.7792444, 427.0262186};
548 constexpr double q4[5] = {1.0, 106.8615961, 337.6496214, 2016.712389, 1597.063511};
549 const double u = 1 / x;
550 return u * u * (p4[0] + (p4[1] + (p4[2] + (p4[3] + p4[4] * u) * u) * u) * u) /
551 (q4[0] + (q4[1] + (q4[2] + (q4[3] + q4[4] * u) * u) * u) * u);
552 };
553 auto case5 = [](double x) {
554 constexpr double p5[5] = {1.003675074, 167.5702434, 4789.711289, 21217.86767, -22324.94910};
555 constexpr double q5[5] = {1.0, 156.9424537, 3745.310488, 9834.698876, 66924.28357};
556 const double u = 1 / x;
557 return u * u * (p5[0] + (p5[1] + (p5[2] + (p5[3] + p5[4] * u) * u) * u) * u) /
558 (q5[0] + (q5[1] + (q5[2] + (q5[3] + q5[4] * u) * u) * u) * u);
559 };
560 auto case6 = [](double x) {
561 constexpr double p6[5] = {1.000827619, 664.9143136, 62972.92665, 475554.6998, -5743609.109};
562 constexpr double q6[5] = {1.0, 651.4101098, 56974.73333, 165917.4725, -2815759.939};
563 const double u = 1 / x;
564 return u * u * (p6[0] + (p6[1] + (p6[2] + (p6[3] + p6[4] * u) * u) * u) * u) /
565 (q6[0] + (q6[1] + (q6[2] + (q6[3] + q6[4] * u) * u) * u) * u);
566 };
567 auto case7 = [](double x) {
568 const double a2[2] = {-1.845568670, -4.284640743};
569 const double u = 1 / (x - x * fast_log(x) / (x + 1));
570 return u * u * (1 + (a2[0] + a2[1] * u) * u);
571 };
572
573 Batch X = batches.args[0];
574 Batch M = batches.args[1];
575 Batch S = batches.args[2];
576
577 for (size_t i = BEGIN; i < batches.nEvents; i += STEP)
578 batches.output[i] = (X[i] - M[i]) / S[i];
579
580 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
581 if (S[i] <= 0.0) {
582 batches.output[i] = 0;
583 } else if (batches.output[i] < -5.5) {
584 batches.output[i] = case0(batches.output[i]);
585 } else if (batches.output[i] < -1.0) {
586 batches.output[i] = case1(batches.output[i]);
587 } else if (batches.output[i] < 1.0) {
588 batches.output[i] = case2(batches.output[i]);
589 } else if (batches.output[i] < 5.0) {
590 batches.output[i] = case3(batches.output[i]);
591 } else if (batches.output[i] < 12.0) {
592 batches.output[i] = case4(batches.output[i]);
593 } else if (batches.output[i] < 50.0) {
594 batches.output[i] = case5(batches.output[i]);
595 } else if (batches.output[i] < 300.) {
596 batches.output[i] = case6(batches.output[i]);
597 } else {
598 batches.output[i] = case7(batches.output[i]);
599 }
600 }
601}
602
604{
605 Batch X = batches.args[0];
606 Batch M0 = batches.args[1];
607 Batch K = batches.args[2];
608 constexpr double rootOf2pi = 2.506628274631000502415765284811;
609 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
610 double lnxOverM0 = fast_log(X[i] / M0[i]);
611 double lnk = fast_log(K[i]);
612 if (lnk < 0)
613 lnk = -lnk;
614 double arg = lnxOverM0 / lnk;
615 arg *= -0.5 * arg;
616 batches.output[i] = fast_exp(arg) / (X[i] * lnk * rootOf2pi);
617 }
618}
619
621{
622 Batch X = batches.args[0];
623 Batch M0 = batches.args[1];
624 Batch K = batches.args[2];
625 constexpr double rootOf2pi = 2.506628274631000502415765284811;
626 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
627 double lnxOverM0 = fast_log(X[i]) - M0[i];
628 double lnk = K[i];
629 if (lnk < 0)
630 lnk = -lnk;
631 double arg = lnxOverM0 / lnk;
632 arg *= -0.5 * arg;
633 batches.output[i] = fast_exp(arg) / (X[i] * lnk * rootOf2pi);
634 }
635}
636
638{
639 auto rawVal = batches.args[0];
640 auto normVal = batches.args[1];
641
642 int nEvalErrorsType0 = 0;
643 int nEvalErrorsType1 = 0;
644 int nEvalErrorsType2 = 0;
645
646 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
647 double out = 0.0;
648 // batches.output[i] = rawVal[i] / normVar[i];
649 if (normVal[i] < 0. || (normVal[i] == 0. && rawVal[i] != 0)) {
650 // Unreasonable normalisations. A zero integral can be tolerated if the function vanishes, though.
651 out = RooNaNPacker::packFloatIntoNaN(-normVal[i] + (rawVal[i] < 0. ? -rawVal[i] : 0.));
652 nEvalErrorsType0++;
653 } else if (rawVal[i] < 0.) {
654 // The pdf value is less than zero.
655 out = RooNaNPacker::packFloatIntoNaN(-rawVal[i]);
656 nEvalErrorsType1++;
657 } else if (std::isnan(rawVal[i])) {
658 // The pdf value is Not-a-Number.
659 out = rawVal[i];
660 nEvalErrorsType2++;
661 } else {
662 out = (rawVal[i] == 0. && normVal[i] == 0.) ? 0. : rawVal[i] / normVal[i];
663 }
664 batches.output[i] = out;
665 }
666
667 if (nEvalErrorsType0 > 0)
668 batches.extra[0] = batches.extra[0] + nEvalErrorsType0;
669 if (nEvalErrorsType1 > 1)
670 batches.extra[1] = batches.extra[1] + nEvalErrorsType1;
671 if (nEvalErrorsType2 > 2)
672 batches.extra[2] = batches.extra[2] + nEvalErrorsType2;
673}
674
675/* TMath::ASinH(x) needs to be replaced with ln( x + sqrt(x^2+1))
676 * argasinh -> the argument of TMath::ASinH()
677 * argln -> the argument of the logarithm that replaces AsinH
678 * asinh -> the value that the function evaluates to
679 *
680 * ln is the logarithm that was solely present in the initial
681 * formula, that is before the asinh replacement
682 */
684{
685 Batch X = batches.args[0];
686 Batch P = batches.args[1];
687 Batch W = batches.args[2];
688 Batch T = batches.args[3];
689 constexpr double xi = 2.3548200450309494; // 2 Sqrt( Ln(4) )
690 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
691 double argasinh = 0.5 * xi * T[i];
692 double argln = argasinh + 1 / fast_isqrt(argasinh * argasinh + 1);
693 double asinh = fast_log(argln);
694
695 double argln2 = 1 - (X[i] - P[i]) * T[i] / W[i];
696 double ln = fast_log(argln2);
697 batches.output[i] = ln / asinh;
698 batches.output[i] *= -0.125 * xi * xi * batches.output[i];
699 batches.output[i] -= 2.0 / xi / xi * asinh * asinh;
700 }
701
702 // faster if you exponentiate in a separate loop (dark magic!)
703 for (size_t i = BEGIN; i < batches.nEvents; i += STEP)
704 batches.output[i] = fast_exp(batches.output[i]);
705}
706
708{
709 Batch x = batches.args[0];
710 Batch mean = batches.args[1];
711 bool protectNegative = batches.extra[0];
712 bool noRounding = batches.extra[1];
713 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
714 const double x_i = noRounding ? x[i] : floor(x[i]);
715 batches.output[i] = std::lgamma(x_i + 1.);
716 }
717
718 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
719 const double x_i = noRounding ? x[i] : floor(x[i]);
720 const double logMean = fast_log(mean[i]);
721 const double logPoisson = x_i * logMean - mean[i] - batches.output[i];
722 batches.output[i] = fast_exp(logPoisson);
723
724 // Cosmetics
725 if (x_i < 0) {
726 batches.output[i] = 0;
727 } else if (x_i == 0) {
728 batches.output[i] = 1 / fast_exp(mean[i]);
729 }
730
731 if (protectNegative && mean[i] < 0)
732 batches.output[i] = 1.E-3;
733 }
734}
735
737{
738 const int nCoef = batches.extra[0];
739 const std::size_t nEvents = batches.nEvents;
740 Batch x = batches.args[nCoef];
741
742 for (size_t i = BEGIN; i < nEvents; i += STEP) {
743 batches.output[i] = batches.args[nCoef - 1][i];
744 }
745
746 // Indexes are in range 0..nCoef-1 but coefList[nCoef-1] has already been
747 // processed.
748 for (int k = nCoef - 2; k >= 0; k--) {
749 for (size_t i = BEGIN; i < nEvents; i += STEP) {
750 batches.output[i] = batches.args[k][i] + x[i] * batches.output[i];
751 }
752 }
753}
754
756{
757 const int nCoef = batches.extra[0];
758 Batch x = batches.args[0];
759
760 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
761 batches.output[i] = 0.0;
762 for (int k = 0; k < nCoef; ++k) {
763 batches.output[i] += batches.args[2 * k + 1][i] * std::pow(x[i], batches.args[2 * k + 2][i]);
764 }
765 }
766}
767
769{
770 const int nPdfs = batches.extra[0];
771 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
772 batches.output[i] = 1.;
773 }
774 for (int pdf = 0; pdf < nPdfs; pdf++) {
775 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
776 batches.output[i] *= batches.args[pdf][i];
777 }
778 }
779}
780
782{
783 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
784 batches.output[i] = batches.args[0][i] / batches.args[1][i];
785 }
786}
787
789{
790
791 const bool isMinus = batches.extra[0] < 0.0;
792 const bool isPlus = batches.extra[0] > 0.0;
793 for (std::size_t i = BEGIN; i < batches.nEvents; i += STEP) {
794 double x = batches.args[0][i];
795 // Enforce sign compatibility
796 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
797 batches.output[i] = isOutOfSign ? 0.0 : fast_exp(-std::abs(x) / batches.args[1][i]);
798 }
799}
800
802{
803 const bool isMinus = batches.extra[0] < 0.0;
804 const bool isPlus = batches.extra[0] > 0.0;
805 for (std::size_t i = BEGIN; i < batches.nEvents; i += STEP) {
806 double x = batches.args[0][i];
807 // Enforce sign compatibility
808 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
809 batches.output[i] =
810 isOutOfSign ? 0.0 : fast_exp(-std::abs(x) / batches.args[1][i]) * fast_sin(x * batches.args[2][i]);
811 }
812}
813
815{
816 const bool isMinus = batches.extra[0] < 0.0;
817 const bool isPlus = batches.extra[0] > 0.0;
818 for (std::size_t i = BEGIN; i < batches.nEvents; i += STEP) {
819 double x = batches.args[0][i];
820 // Enforce sign compatibility
821 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
822 batches.output[i] =
823 isOutOfSign ? 0.0 : fast_exp(-std::abs(x) / batches.args[1][i]) * fast_cos(x * batches.args[2][i]);
824 }
825}
826
828{
829 const bool isMinus = batches.extra[0] < 0.0;
830 const bool isPlus = batches.extra[0] > 0.0;
831 for (std::size_t i = BEGIN; i < batches.nEvents; i += STEP) {
832 double x = batches.args[0][i];
833 // Enforce sign compatibility
834 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
835 if (isOutOfSign) {
836 batches.output[i] = 0.0;
837 } else {
838 const double tscaled = std::abs(x) / batches.args[1][i];
839 batches.output[i] = fast_exp(-tscaled) * tscaled;
840 }
841 }
842}
843
845{
846 const bool isMinus = batches.extra[0] < 0.0;
847 const bool isPlus = batches.extra[0] > 0.0;
848 for (std::size_t i = BEGIN; i < batches.nEvents; i += STEP) {
849 double x = batches.args[0][i];
850 // Enforce sign compatibility
851 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
852 if (isOutOfSign) {
853 batches.output[i] = 0.0;
854 } else {
855 const double tscaled = std::abs(x) / batches.args[1][i];
856 batches.output[i] = fast_exp(-tscaled) * tscaled * tscaled;
857 }
858 }
859}
860
862{
863 const bool isMinus = batches.extra[0] < 0.0;
864 const bool isPlus = batches.extra[0] > 0.0;
865 for (std::size_t i = BEGIN; i < batches.nEvents; i += STEP) {
866 double x = batches.args[0][i];
867 // Enforce sign compatibility
868 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
869 batches.output[i] =
870 isOutOfSign ? 0.0 : fast_exp(-std::abs(x) / batches.args[1][i]) * sinh(x * batches.args[2][i] * 0.5);
871 }
872}
873
875{
876 const bool isMinus = batches.extra[0] < 0.0;
877 const bool isPlus = batches.extra[0] > 0.0;
878 for (std::size_t i = BEGIN; i < batches.nEvents; i += STEP) {
879 double x = batches.args[0][i];
880 // Enforce sign compatibility
881 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
882 batches.output[i] =
883 isOutOfSign ? 0.0 : fast_exp(-std::abs(x) / batches.args[1][i]) * cosh(x * batches.args[2][i] * .5);
884 }
885}
886
888{
889 Batch X = batches.args[0];
890 Batch M = batches.args[1];
891 Batch W = batches.args[2];
892 Batch S = batches.args[3];
893 const double invSqrt2 = 0.707106781186547524400844362105;
894 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
895 const double arg = (X[i] - M[i]) * (X[i] - M[i]);
896 if (S[i] == 0.0 && W[i] == 0.0) {
897 batches.output[i] = 1.0;
898 } else if (S[i] == 0.0) {
899 batches.output[i] = 1 / (arg + 0.25 * W[i] * W[i]);
900 } else if (W[i] == 0.0) {
901 batches.output[i] = fast_exp(-0.5 * arg / (S[i] * S[i]));
902 } else {
903 batches.output[i] = invSqrt2 / S[i];
904 }
905 }
906
907 for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
908 if (S[i] != 0.0 && W[i] != 0.0) {
909 if (batches.output[i] < 0)
910 batches.output[i] = -batches.output[i];
911 const double factor = W[i] > 0.0 ? 0.5 : -0.5;
912 RooHeterogeneousMath::STD::complex<double> z(batches.output[i] * (X[i] - M[i]),
913 factor * batches.output[i] * W[i]);
914 batches.output[i] *= RooHeterogeneousMath::faddeeva(z).real();
915 }
916 }
917}
918
919/// Returns a std::vector of pointers to the compute functions in this file.
962} // End namespace RF_ARCH
963} // End namespace RooBatchCompute
#define __rooglobal__
Definition AccHeaders.h:37
#define STEP
#define BEGIN
#define c(i)
Definition RSha256.hxx:101
#define X(type, name)
#define N
float xmin
float xmax
std::size_t nEvents
Definition Batches.h:46
double *__restrict output
Definition Batches.h:49
const Double_t sigma
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
#define G(x, y, z)
void computeLognormalStandard(Batches &batches)
void computeDeltaFunction(Batches &batches)
void computeDstD0BG(Batches &batches)
void computeBukin(Batches &batches)
void computeGamma(Batches &batches)
void computeLandau(Batches &batches)
void computeGaussian(Batches &batches)
void computeJohnson(Batches &batches)
void computeNovosibirsk(Batches &batches)
void computeLognormal(Batches &batches)
void computeArgusBG(Batches &batches)
void computeNegativeLogarithms(Batches &batches)
void computeExponential(Batches &batches)
void computeBifurGauss(Batches &batches)
void computeTruthModelExpBasis(Batches &batches)
void computeAddPdf(Batches &batches)
void computePolynomial(Batches &batches)
void computeBMixDecay(Batches &batches)
void computeTruthModelLinBasis(Batches &batches)
void computeTruthModelSinBasis(Batches &batches)
void computeVoigtian(Batches &batches)
void computeChiSquare(Batches &batches)
void computeIdentity(Batches &batches)
void computeTruthModelCosBasis(Batches &batches)
void computeGaussModelExpBasis(Batches &batches)
std::vector< void(*)(Batches &)> getFunctions()
Returns a std::vector of pointers to the compute functions in this file.
void computePower(Batches &batches)
void computeChebychev(Batches &batches)
void computeBernstein(Batches &batches)
void computeProdPdf(Batches &batches)
void computeRatio(Batches &batches)
void computeTruthModelQuadBasis(Batches &batches)
void computeExponentialNeg(Batches &batches)
void computeTruthModelSinhBasis(Batches &batches)
void computeExpPoly(Batches &batches)
void computePoisson(Batches &batches)
void computeTruthModelCoshBasis(Batches &batches)
void computeBreitWigner(Batches &batches)
void computeNormalizedPdf(Batches &batches)
void computeCBShape(Batches &batches)
Namespace for dispatching RooFit computations to various backends.
__roodevice__ double fast_exp(double x)
__roodevice__ double fast_sin(double x)
constexpr std::size_t bufferSize
__roodevice__ double fast_log(double x)
__roodevice__ double fast_cos(double x)
__roodevice__ double fast_isqrt(double x)
STD::complex< double > faddeeva(STD::complex< double > z)
STD::complex< double > evalCerf(double swt, double u, double c)
constexpr Double_t TwoPi()
Definition TMath.h:47
#define R1(v, w, x, y, z, i)
Definition sha1.inl:134
#define R2(v, w, x, y, z, i)
Definition sha1.inl:137
static double packFloatIntoNaN(float payload)
Pack float into mantissa of a NaN.
TMarker m
Definition textangle.C:8