Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
ComputeFunctions.cxx
Go to the documentation of this file.
1/*
2 * Project: RooFit
3 * Authors:
4 * Emmanouil Michalainas, CERN, Summer 2019
5 *
6 * Copyright (c) 2021, CERN
7 *
8 * Redistribution and use in source and binary forms,
9 * with or without modification, are permitted according to the terms
10 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
11 */
12
13/**
14\file ComputeFunctions.cxx
15\ingroup Roobatchcompute
16
17This file contains vectorizable computation functions for PDFs and other Roofit objects.
18The same source file can also be compiled with nvcc. All functions have a single `Batches`
19object as an argument passed by value, which contains all the information necessary for the
20computation. In case of cuda computations, the loops have a step (stride) the size of the grid
21which allows for reusing the same code as the cpu implementations, easier debugging and in terms
22of performance, maximum memory coalescing. For more details, see
23https://developer.nvidia.com/blog/cuda-pro-tip-write-flexible-kernels-grid-stride-loops/
24**/
25
26#include "RooBatchCompute.h"
27#include "RooNaNPacker.h"
28#include "RooVDTHeaders.h"
29#include "Batches.h"
30
31#include <TMath.h>
32
34
35#ifdef __CUDACC__
36#define BEGIN blockDim.x *blockIdx.x + threadIdx.x
37#define STEP blockDim.x *gridDim.x
38#else
39#define BEGIN 0
40#define STEP 1
41#endif // #ifdef __CUDACC__
42
43namespace RooBatchCompute {
44namespace RF_ARCH {
45
47{
48 const int nPdfs = batches.getNExtraArgs();
49 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
50 batches._output[i] = batches.extraArg(0) * batches[0][i];
51 for (int pdf = 1; pdf < nPdfs; pdf++)
52 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
53 batches._output[i] += batches.extraArg(pdf) * batches[pdf][i];
54}
55
57{
58 Batch m = batches[0], m0 = batches[1], c = batches[2], p = batches[3];
59 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
60 const double t = m[i] / m0[i];
61 const double u = 1 - t * t;
62 batches._output[i] = c[i] * u + p[i] * fast_log(u);
63 }
64 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
65 if (m[i] >= m0[i])
66 batches._output[i] = 0.0;
67 else
68 batches._output[i] = m[i] * fast_exp(batches._output[i]);
69 }
70}
71
73{
74 Batch coef0 = batches[0];
75 Batch coef1 = batches[1];
76 Batch tagFlav = batches[2];
77 Batch delMistag = batches[3];
78 Batch mixState = batches[4];
79 Batch mistag = batches[5];
80
81 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
82 batches._output[i] =
83 coef0[i] * (1.0 - tagFlav[i] * delMistag[0]) + coef1[i] * (mixState[i] * (1.0 - 2.0 * mistag[0]));
84 }
85}
86
88{
89 const int nCoef = batches.getNExtraArgs() - 2;
90 const int degree = nCoef - 1;
91 const double xmin = batches.extraArg(nCoef);
92 const double xmax = batches.extraArg(nCoef + 1);
93 Batch xData = batches[0];
94
95 // apply binomial coefficient in-place so we don't have to allocate new memory
96 double binomial = 1.0;
97 for (int k = 0; k < nCoef; k++) {
98 batches.setExtraArg(k, batches.extraArg(k) * binomial);
99 binomial = (binomial * (degree - k)) / (k + 1);
100 }
101
102 if (STEP == 1) {
103 double X[bufferSize], _1_X[bufferSize], powX[bufferSize], pow_1_X[bufferSize];
104 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
105 powX[i] = pow_1_X[i] = 1.0;
106 X[i] = (xData[i] - xmin) / (xmax - xmin);
107 _1_X[i] = 1 - X[i];
108 batches._output[i] = 0.0;
109 }
110
111 // raising 1-x to the power of degree
112 for (int k = 2; k <= degree; k += 2)
113 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
114 pow_1_X[i] *= _1_X[i] * _1_X[i];
115
116 if (degree % 2 == 1)
117 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
118 pow_1_X[i] *= _1_X[i];
119
120 // inverting 1-x ---> 1/(1-x)
121 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
122 _1_X[i] = 1 / _1_X[i];
123
124 for (int k = 0; k < nCoef; k++)
125 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
126 batches._output[i] += batches.extraArg(k) * powX[i] * pow_1_X[i];
127
128 // calculating next power for x and 1-x
129 powX[i] *= X[i];
130 pow_1_X[i] *= _1_X[i];
131 }
132 } else
133 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
134 batches._output[i] = 0.0;
135 const double X = (xData[i] - xmin) / (xmax - xmin);
136 double powX = 1.0, pow_1_X = 1.0;
137 for (int k = 1; k <= degree; k++)
138 pow_1_X *= 1 - X;
139 const double _1_X = 1 / (1 - X);
140 for (int k = 0; k < nCoef; k++) {
141 batches._output[i] += batches.extraArg(k) * powX * pow_1_X;
142 powX *= X;
143 pow_1_X *= _1_X;
144 }
145 }
146
147 // reset extraArgs values so we don't mutate the Batches object
148 binomial = 1.0;
149 for (int k = 0; k < nCoef; k++) {
150 batches.setExtraArg(k, batches.extraArg(k) / binomial);
151 binomial = (binomial * (degree - k)) / (k + 1);
152 }
153}
154
156{
157 Batch X = batches[0], M = batches[1], SL = batches[2], SR = batches[3];
158 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
159 double arg = X[i] - M[i];
160 if (arg < 0)
161 arg /= SL[i];
162 else
163 arg /= SR[i];
164 batches._output[i] = fast_exp(-0.5 * arg * arg);
165 }
166}
167
169{
170 Batch X = batches[0], M = batches[1], W = batches[2];
171 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
172 const double arg = X[i] - M[i];
173 batches._output[i] = 1 / (arg * arg + 0.25 * W[i] * W[i]);
174 }
175}
176
178{
179 Batch X = batches[0], XP = batches[1], SP = batches[2], XI = batches[3], R1 = batches[4], R2 = batches[5];
180 const double r3 = log(2.0);
181 const double r6 = exp(-6.0);
182 const double r7 = 2 * sqrt(2 * log(2.0));
183
184 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
185 const double r1 = XI[i] * fast_isqrt(XI[i] * XI[i] + 1);
186 const double r4 = 1 / fast_isqrt(XI[i] * XI[i] + 1);
187 const double hp = 1 / (SP[i] * r7);
188 const double x1 = XP[i] + 0.5 * SP[i] * r7 * (r1 - 1);
189 const double x2 = XP[i] + 0.5 * SP[i] * r7 * (r1 + 1);
190
191 double r5 = 1.0;
192 if (XI[i] > r6 || XI[i] < -r6)
193 r5 = XI[i] / fast_log(r4 + XI[i]);
194
195 double factor = 1, y = X[i] - x1, Yp = XP[i] - x1, yi = r4 - XI[i], rho = R1[i];
196 if (X[i] >= x2) {
197 factor = -1;
198 y = X[i] - x2;
199 Yp = XP[i] - x2;
200 yi = r4 + XI[i];
201 rho = R2[i];
202 }
203
204 batches._output[i] = rho * y * y / Yp / Yp - r3 + factor * 4 * r3 * y * hp * r5 * r4 / yi / yi;
205 if (X[i] >= x1 && X[i] < x2) {
206 batches._output[i] =
207 fast_log(1 + 4 * XI[i] * r4 * (X[i] - XP[i]) * hp) / fast_log(1 + 2 * XI[i] * (XI[i] - r4));
208 batches._output[i] *= -batches._output[i] * r3;
209 }
210 if (X[i] >= x1 && X[i] < x2 && XI[i] < r6 && XI[i] > -r6)
211 batches._output[i] = -4 * r3 * (X[i] - XP[i]) * (X[i] - XP[i]) * hp * hp;
212 }
213 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
214 batches._output[i] = fast_exp(batches._output[i]);
215}
216
218{
219 Batch M = batches[0], M0 = batches[1], S = batches[2], A = batches[3], N = batches[4];
220 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
221 const double t = (M[i] - M0[i]) / S[i];
222 if ((A[i] > 0 && t >= -A[i]) || (A[i] < 0 && -t >= A[i]))
223 batches._output[i] = -0.5 * t * t;
224 else {
225 batches._output[i] = N[i] / (N[i] - A[i] * A[i] - A[i] * t);
226 batches._output[i] = fast_log(batches._output[i]);
227 batches._output[i] *= N[i];
228 batches._output[i] -= 0.5 * A[i] * A[i];
229 }
230 }
231 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
232 batches._output[i] = fast_exp(batches._output[i]);
233}
234
236{
237 Batch xData = batches[0];
238 const int nCoef = batches.getNExtraArgs() - 2;
239 const double xmin = batches.extraArg(nCoef);
240 const double xmax = batches.extraArg(nCoef + 1);
241
242 if (STEP == 1) {
243 double prev[bufferSize][2], X[bufferSize];
244
245 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
246 // set a0-->prev[i][0] and a1-->prev[i][1]
247 // and x tranfsformed to range[-1..1]-->X[i]
248 prev[i][0] = batches._output[i] = 1.0;
249 prev[i][1] = X[i] = 2 * (xData[i] - 0.5 * (xmax + xmin)) / (xmax - xmin);
250 }
251 for (int k = 0; k < nCoef; k++)
252 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
253 batches._output[i] += prev[i][1] * batches.extraArg(k);
254
255 // compute next order
256 const double next = 2 * X[i] * prev[i][1] - prev[i][0];
257 prev[i][0] = prev[i][1];
258 prev[i][1] = next;
259 }
260 } else
261 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
262 double prev0 = 1.0, prev1 = 2 * (xData[i] - 0.5 * (xmax + xmin)) / (xmax - xmin), X = prev1;
263 batches._output[i] = 1.0;
264 for (int k = 0; k < nCoef; k++) {
265 batches._output[i] += prev1 * batches.extraArg(k);
266
267 // compute next order
268 const double next = 2 * X * prev1 - prev0;
269 prev0 = prev1;
270 prev1 = next;
271 }
272 }
273}
274
276{
277 Batch X = batches[0];
278 const double ndof = batches.extraArg(0);
279 const double gamma = 1 / std::tgamma(ndof / 2.0);
280 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
281 batches._output[i] = gamma;
282
283 constexpr double ln2 = 0.693147180559945309417232121458;
284 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
285 double arg = (ndof - 2) * fast_log(X[i]) - X[i] - ndof * ln2;
286 batches._output[i] *= fast_exp(0.5 * arg);
287 }
288}
289
291{
292 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
293 batches._output[i] = 0.0 + (batches[0][i] == 1.0);
294 }
295}
296
298{
299 Batch DM = batches[0], DM0 = batches[1], C = batches[2], A = batches[3], B = batches[4];
300 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
301 const double ratio = DM[i] / DM0[i];
302 const double arg1 = (DM0[i] - DM[i]) / C[i];
303 const double arg2 = A[i] * fast_log(ratio);
304 batches._output[i] = (1 - fast_exp(arg1)) * fast_exp(arg2) + B[i] * (ratio - 1);
305 }
306
307 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
308 if (batches._output[i] < 0)
309 batches._output[i] = 0;
310}
311
313{
314 int lowestOrder = batches.extraArg(0);
315 int nTerms = batches.extraArg(1);
316 auto x = batches[0];
317
318 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
319 batches._output[i] = 0.0;
320 double xTmp = std::pow(x[i], lowestOrder);
321 for (int k = 0; k < nTerms; ++k) {
322 batches._output[i] += batches[k + 1][i] * xTmp;
323 xTmp *= x[i];
324 }
325 batches._output[i] = std::exp(batches._output[i]);
326 }
327}
328
330{
331 Batch x = batches[0];
332 Batch c = batches[1];
333 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
334 batches._output[i] = fast_exp(x[i] * c[i]);
335 }
336}
337
339{
340 Batch x = batches[0];
341 Batch c = batches[1];
342 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
343 batches._output[i] = fast_exp(-x[i] * c[i]);
344 }
345}
346
348{
349 Batch X = batches[0], G = batches[1], B = batches[2], M = batches[3];
350 double gamma = -std::lgamma(G[0]);
351 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
352 if (X[i] == M[i])
353 batches._output[i] = (G[i] == 1.0) / B[i];
354 else if (G.isItVector())
355 batches._output[i] = -std::lgamma(G[i]);
356 else
357 batches._output[i] = gamma;
358
359 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
360 if (X[i] != M[i]) {
361 const double invBeta = 1 / B[i];
362 double arg = (X[i] - M[i]) * invBeta;
363 batches._output[i] -= arg;
364 arg = fast_log(arg);
365 batches._output[i] += arg * (G[i] - 1);
366 batches._output[i] = fast_exp(batches._output[i]);
367 batches._output[i] *= invBeta;
368 }
369}
370
372{
373 const double root2 = std::sqrt(2.);
374 const double root2pi = std::sqrt(2. * std::atan2(0., -1.));
375
376 const bool isMinus = batches.extraArg(0) < 0.0;
377 const bool isPlus = batches.extraArg(0) > 0.0;
378
379 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
380
381 const double x = batches[0][i];
382 const double mean = batches[1][i] * batches[2][i];
383 const double sigma = batches[3][i] * batches[4][i];
384 const double tau = batches[5][i];
385
386 if (tau == 0.0) {
387 // Straight Gaussian, used for unconvoluted PDF or expBasis with 0 lifetime
388 double xprime = (x - mean) / sigma;
389 double result = std::exp(-0.5 * xprime * xprime) / (sigma * root2pi);
390 if (!isMinus && !isPlus)
391 result *= 2;
392 batches._output[i] = result;
393 } else {
394 // Convolution with exp(-t/tau)
395 const double xprime = (x - mean) / tau;
396 const double c = sigma / (root2 * tau);
397 const double u = xprime / (2 * c);
398
399 double result = 0.0;
400 if (!isMinus)
401 result += RooHeterogeneousMath::evalCerf(0, -u, c).real();
402 if (!isPlus)
403 result += RooHeterogeneousMath::evalCerf(0, u, c).real();
404 batches._output[i] = result;
405 }
406 }
407}
408
410{
411 auto x = batches[0];
412 auto mean = batches[1];
413 auto sigma = batches[2];
414 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
415 const double arg = x[i] - mean[i];
416 const double halfBySigmaSq = -0.5 / (sigma[i] * sigma[i]);
417 batches._output[i] = fast_exp(arg * arg * halfBySigmaSq);
418 }
419}
420
422{
423 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
424 batches._output[i] = batches[0][i];
425 }
426}
427
429{
430 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
431 batches._output[i] = -fast_log(batches[0][i]);
432 // Multiply by weights if they exist
433 if (batches.extraArg(0))
434 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
435 batches._output[i] *= batches[1][i];
436}
437
439{
440 Batch mass = batches[0], mu = batches[1], lambda = batches[2], gamma = batches[3], delta = batches[4];
441 const double sqrtTwoPi = std::sqrt(TMath::TwoPi());
442 const double massThreshold = batches.extraArg(0);
443
444 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
445 const double arg = (mass[i] - mu[i]) / lambda[i];
446#ifdef R__HAS_VDT
447 const double asinh_arg = fast_log(arg + 1 / fast_isqrt(arg * arg + 1));
448#else
449 const double asinh_arg = asinh(arg);
450#endif
451 const double expo = gamma[i] + delta[i] * asinh_arg;
452 const double result =
453 delta[i] * fast_exp(-0.5 * expo * expo) * fast_isqrt(1. + arg * arg) / (sqrtTwoPi * lambda[i]);
454
455 const double passThrough = mass[i] >= massThreshold;
456 batches._output[i] = result * passThrough;
457 }
458}
459
460/* Actual computation of Landau(x,mean,sigma) in a vectorization-friendly way
461 * Code copied from function landau_pdf (math/mathcore/src/PdfFuncMathCore.cxx)
462 * and rewritten to enable vectorization.
463 */
465{
466 auto case0 = [](double x) {
467 const double a1[3] = {0.04166666667, -0.01996527778, 0.02709538966};
468 const double u = fast_exp(x + 1.0);
469 return 0.3989422803 * fast_exp(-1 / u - 0.5 * (x + 1)) * (1 + (a1[0] + (a1[1] + a1[2] * u) * u) * u);
470 };
471 auto case1 = [](double x) {
472 constexpr double p1[5] = {0.4259894875, -0.1249762550, 0.03984243700, -0.006298287635, 0.001511162253};
473 constexpr double q1[5] = {1.0, -0.3388260629, 0.09594393323, -0.01608042283, 0.003778942063};
474 const double u = fast_exp(-x - 1);
475 return fast_exp(-u - 0.5 * (x + 1)) * (p1[0] + (p1[1] + (p1[2] + (p1[3] + p1[4] * x) * x) * x) * x) /
476 (q1[0] + (q1[1] + (q1[2] + (q1[3] + q1[4] * x) * x) * x) * x);
477 };
478 auto case2 = [](double x) {
479 constexpr double p2[5] = {0.1788541609, 0.1173957403, 0.01488850518, -0.001394989411, 0.0001283617211};
480 constexpr double q2[5] = {1.0, 0.7428795082, 0.3153932961, 0.06694219548, 0.008790609714};
481 return (p2[0] + (p2[1] + (p2[2] + (p2[3] + p2[4] * x) * x) * x) * x) /
482 (q2[0] + (q2[1] + (q2[2] + (q2[3] + q2[4] * x) * x) * x) * x);
483 };
484 auto case3 = [](double x) {
485 constexpr double p3[5] = {0.1788544503, 0.09359161662, 0.006325387654, 0.00006611667319, -0.000002031049101};
486 constexpr double q3[5] = {1.0, 0.6097809921, 0.2560616665, 0.04746722384, 0.006957301675};
487 return (p3[0] + (p3[1] + (p3[2] + (p3[3] + p3[4] * x) * x) * x) * x) /
488 (q3[0] + (q3[1] + (q3[2] + (q3[3] + q3[4] * x) * x) * x) * x);
489 };
490 auto case4 = [](double x) {
491 constexpr double p4[5] = {0.9874054407, 118.6723273, 849.2794360, -743.7792444, 427.0262186};
492 constexpr double q4[5] = {1.0, 106.8615961, 337.6496214, 2016.712389, 1597.063511};
493 const double u = 1 / x;
494 return u * u * (p4[0] + (p4[1] + (p4[2] + (p4[3] + p4[4] * u) * u) * u) * u) /
495 (q4[0] + (q4[1] + (q4[2] + (q4[3] + q4[4] * u) * u) * u) * u);
496 };
497 auto case5 = [](double x) {
498 constexpr double p5[5] = {1.003675074, 167.5702434, 4789.711289, 21217.86767, -22324.94910};
499 constexpr double q5[5] = {1.0, 156.9424537, 3745.310488, 9834.698876, 66924.28357};
500 const double u = 1 / x;
501 return u * u * (p5[0] + (p5[1] + (p5[2] + (p5[3] + p5[4] * u) * u) * u) * u) /
502 (q5[0] + (q5[1] + (q5[2] + (q5[3] + q5[4] * u) * u) * u) * u);
503 };
504 auto case6 = [](double x) {
505 constexpr double p6[5] = {1.000827619, 664.9143136, 62972.92665, 475554.6998, -5743609.109};
506 constexpr double q6[5] = {1.0, 651.4101098, 56974.73333, 165917.4725, -2815759.939};
507 const double u = 1 / x;
508 return u * u * (p6[0] + (p6[1] + (p6[2] + (p6[3] + p6[4] * u) * u) * u) * u) /
509 (q6[0] + (q6[1] + (q6[2] + (q6[3] + q6[4] * u) * u) * u) * u);
510 };
511 auto case7 = [](double x) {
512 const double a2[2] = {-1.845568670, -4.284640743};
513 const double u = 1 / (x - x * fast_log(x) / (x + 1));
514 return u * u * (1 + (a2[0] + a2[1] * u) * u);
515 };
516
517 Batch X = batches[0], M = batches[1], S = batches[2];
518
519 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
520 batches._output[i] = (X[i] - M[i]) / S[i];
521
522 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
523 if (S[i] <= 0.0)
524 batches._output[i] = 0;
525 else if (batches._output[i] < -5.5)
526 batches._output[i] = case0(batches._output[i]);
527 else if (batches._output[i] < -1.0)
528 batches._output[i] = case1(batches._output[i]);
529 else if (batches._output[i] < 1.0)
530 batches._output[i] = case2(batches._output[i]);
531 else if (batches._output[i] < 5.0)
532 batches._output[i] = case3(batches._output[i]);
533 else if (batches._output[i] < 12.0)
534 batches._output[i] = case4(batches._output[i]);
535 else if (batches._output[i] < 50.0)
536 batches._output[i] = case5(batches._output[i]);
537 else if (batches._output[i] < 300.)
538 batches._output[i] = case6(batches._output[i]);
539 else
540 batches._output[i] = case7(batches._output[i]);
541}
542
544{
545 Batch X = batches[0], M0 = batches[1], K = batches[2];
546 constexpr double rootOf2pi = 2.506628274631000502415765284811;
547 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
548 double lnxOverM0 = fast_log(X[i] / M0[i]);
549 double lnk = fast_log(K[i]);
550 if (lnk < 0)
551 lnk = -lnk;
552 double arg = lnxOverM0 / lnk;
553 arg *= -0.5 * arg;
554 batches._output[i] = fast_exp(arg) / (X[i] * lnk * rootOf2pi);
555 }
556}
557
559{
560 Batch X = batches[0], M0 = batches[1], K = batches[2];
561 constexpr double rootOf2pi = 2.506628274631000502415765284811;
562 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
563 double lnxOverM0 = fast_log(X[i]) - M0[i];
564 double lnk = K[i];
565 if (lnk < 0)
566 lnk = -lnk;
567 double arg = lnxOverM0 / lnk;
568 arg *= -0.5 * arg;
569 batches._output[i] = fast_exp(arg) / (X[i] * lnk * rootOf2pi);
570 }
571}
572
574{
575 auto rawVal = batches[0];
576 auto normVal = batches[1];
577
578 int nEvalErrorsType0 = 0;
579 int nEvalErrorsType1 = 0;
580 int nEvalErrorsType2 = 0;
581
582 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
583 double out = 0.0;
584 // batches._output[i] = rawVal[i] / normVar[i];
585 if (normVal[i] < 0. || (normVal[i] == 0. && rawVal[i] != 0)) {
586 // Unreasonable normalisations. A zero integral can be tolerated if the function vanishes, though.
587 out = RooNaNPacker::packFloatIntoNaN(-normVal[i] + (rawVal[i] < 0. ? -rawVal[i] : 0.));
588 nEvalErrorsType0++;
589 } else if (rawVal[i] < 0.) {
590 // The pdf value is less than zero.
591 out = RooNaNPacker::packFloatIntoNaN(-rawVal[i]);
592 nEvalErrorsType1++;
593 } else if (std::isnan(rawVal[i])) {
594 // The pdf value is Not-a-Number.
595 out = rawVal[i];
596 nEvalErrorsType2++;
597 } else {
598 out = (rawVal[i] == 0. && normVal[i] == 0.) ? 0. : rawVal[i] / normVal[i];
599 }
600 batches._output[i] = out;
601 }
602
603 if (nEvalErrorsType0 > 0)
604 batches.setExtraArg(0, batches.extraArg(0) + nEvalErrorsType0);
605 if (nEvalErrorsType1 > 1)
606 batches.setExtraArg(1, batches.extraArg(1) + nEvalErrorsType1);
607 if (nEvalErrorsType2 > 2)
608 batches.setExtraArg(2, batches.extraArg(2) + nEvalErrorsType2);
609}
610
611/* TMath::ASinH(x) needs to be replaced with ln( x + sqrt(x^2+1))
612 * argasinh -> the argument of TMath::ASinH()
613 * argln -> the argument of the logarithm that replaces AsinH
614 * asinh -> the value that the function evaluates to
615 *
616 * ln is the logarithm that was solely present in the initial
617 * formula, that is before the asinh replacement
618 */
620{
621 Batch X = batches[0], P = batches[1], W = batches[2], T = batches[3];
622 constexpr double xi = 2.3548200450309494; // 2 Sqrt( Ln(4) )
623 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
624 double argasinh = 0.5 * xi * T[i];
625 double argln = argasinh + 1 / fast_isqrt(argasinh * argasinh + 1);
626 double asinh = fast_log(argln);
627
628 double argln2 = 1 - (X[i] - P[i]) * T[i] / W[i];
629 double ln = fast_log(argln2);
630 batches._output[i] = ln / asinh;
631 batches._output[i] *= -0.125 * xi * xi * batches._output[i];
632 batches._output[i] -= 2.0 / xi / xi * asinh * asinh;
633 }
634
635 // faster if you exponentiate in a separate loop (dark magic!)
636 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
637 batches._output[i] = fast_exp(batches._output[i]);
638}
639
641{
642 Batch x = batches[0], mean = batches[1];
643 bool protectNegative = batches.extraArg(0);
644 bool noRounding = batches.extraArg(1);
645 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
646 const double x_i = noRounding ? x[i] : floor(x[i]);
647 batches._output[i] = std::lgamma(x_i + 1.);
648 }
649
650 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
651 const double x_i = noRounding ? x[i] : floor(x[i]);
652 const double logMean = fast_log(mean[i]);
653 const double logPoisson = x_i * logMean - mean[i] - batches._output[i];
654 batches._output[i] = fast_exp(logPoisson);
655
656 // Cosmetics
657 if (x_i < 0)
658 batches._output[i] = 0;
659 else if (x_i == 0)
660 batches._output[i] = 1 / fast_exp(mean[i]);
661
662 if (protectNegative && mean[i] < 0)
663 batches._output[i] = 1.E-3;
664 }
665}
666
668{
669 const int nCoef = batches.extraArg(0);
670 const std::size_t nEvents = batches.getNEvents();
671 Batch x = batches[nCoef];
672
673 for (size_t i = BEGIN; i < nEvents; i += STEP) {
674 batches._output[i] = batches[nCoef - 1][i];
675 }
676
677 // Indexes are in range 0..nCoef-1 but coefList[nCoef-1] has already been
678 // processed.
679 for (int k = nCoef - 2; k >= 0; k--) {
680 for (size_t i = BEGIN; i < nEvents; i += STEP) {
681 batches._output[i] = batches[k][i] + x[i] * batches._output[i];
682 }
683 }
684}
685
687{
688 const int nCoef = batches.extraArg(0);
689 Batch x = batches[0];
690
691 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
692 batches._output[i] = 0.0;
693 for (int k = 0; k < nCoef; ++k) {
694 batches._output[i] += batches[2 * k + 1][i] * std::pow(x[i], batches[2 * k + 2][i]);
695 }
696 }
697}
698
700{
701 const int nPdfs = batches.extraArg(0);
702 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
703 batches._output[i] = 1.;
704 }
705 for (int pdf = 0; pdf < nPdfs; pdf++) {
706 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
707 batches._output[i] *= batches[pdf][i];
708 }
709 }
710}
711
713{
714 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
715 batches._output[i] = batches[0][i] / batches[1][i];
716 }
717}
718
720{
721
722 const bool isMinus = batches.extraArg(0) < 0.0;
723 const bool isPlus = batches.extraArg(0) > 0.0;
724 for (std::size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
725 double x = batches[0][i];
726 // Enforce sign compatibility
727 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
728 batches._output[i] = isOutOfSign ? 0.0 : fast_exp(-std::abs(x) / batches[1][i]);
729 }
730}
731
733{
734 const bool isMinus = batches.extraArg(0) < 0.0;
735 const bool isPlus = batches.extraArg(0) > 0.0;
736 for (std::size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
737 double x = batches[0][i];
738 // Enforce sign compatibility
739 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
740 batches._output[i] = isOutOfSign ? 0.0 : fast_exp(-std::abs(x) / batches[1][i]) * fast_sin(x * batches[2][i]);
741 }
742}
743
745{
746 const bool isMinus = batches.extraArg(0) < 0.0;
747 const bool isPlus = batches.extraArg(0) > 0.0;
748 for (std::size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
749 double x = batches[0][i];
750 // Enforce sign compatibility
751 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
752 batches._output[i] = isOutOfSign ? 0.0 : fast_exp(-std::abs(x) / batches[1][i]) * fast_cos(x * batches[2][i]);
753 }
754}
755
757{
758 const bool isMinus = batches.extraArg(0) < 0.0;
759 const bool isPlus = batches.extraArg(0) > 0.0;
760 for (std::size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
761 double x = batches[0][i];
762 // Enforce sign compatibility
763 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
764 if (isOutOfSign) {
765 batches._output[i] = 0.0;
766 } else {
767 const double tscaled = std::abs(x) / batches[1][i];
768 batches._output[i] = fast_exp(-tscaled) * tscaled;
769 }
770 }
771}
772
774{
775 const bool isMinus = batches.extraArg(0) < 0.0;
776 const bool isPlus = batches.extraArg(0) > 0.0;
777 for (std::size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
778 double x = batches[0][i];
779 // Enforce sign compatibility
780 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
781 if (isOutOfSign) {
782 batches._output[i] = 0.0;
783 } else {
784 const double tscaled = std::abs(x) / batches[1][i];
785 batches._output[i] = fast_exp(-tscaled) * tscaled * tscaled;
786 }
787 }
788}
789
791{
792 const bool isMinus = batches.extraArg(0) < 0.0;
793 const bool isPlus = batches.extraArg(0) > 0.0;
794 for (std::size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
795 double x = batches[0][i];
796 // Enforce sign compatibility
797 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
798 batches._output[i] = isOutOfSign ? 0.0 : fast_exp(-std::abs(x) / batches[1][i]) * sinh(x * batches[2][i] * 0.5);
799 }
800}
801
803{
804 const bool isMinus = batches.extraArg(0) < 0.0;
805 const bool isPlus = batches.extraArg(0) > 0.0;
806 for (std::size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
807 double x = batches[0][i];
808 // Enforce sign compatibility
809 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
810 batches._output[i] = isOutOfSign ? 0.0 : fast_exp(-std::abs(x) / batches[1][i]) * cosh(x * batches[2][i] * .5);
811 }
812}
813
815{
816 Batch X = batches[0], M = batches[1], W = batches[2], S = batches[3];
817 const double invSqrt2 = 0.707106781186547524400844362105;
818 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
819 const double arg = (X[i] - M[i]) * (X[i] - M[i]);
820 if (S[i] == 0.0 && W[i] == 0.0)
821 batches._output[i] = 1.0;
822 else if (S[i] == 0.0)
823 batches._output[i] = 1 / (arg + 0.25 * W[i] * W[i]);
824 else if (W[i] == 0.0)
825 batches._output[i] = fast_exp(-0.5 * arg / (S[i] * S[i]));
826 else
827 batches._output[i] = invSqrt2 / S[i];
828 }
829
830 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
831 if (S[i] != 0.0 && W[i] != 0.0) {
832 if (batches._output[i] < 0)
833 batches._output[i] = -batches._output[i];
834 const double factor = W[i] > 0.0 ? 0.5 : -0.5;
835 RooHeterogeneousMath::STD::complex<double> z(batches._output[i] * (X[i] - M[i]),
836 factor * batches._output[i] * W[i]);
837 batches._output[i] *= RooHeterogeneousMath::faddeeva(z).real();
838 }
839 }
840}
841
842/// Returns a std::vector of pointers to the compute functions in this file.
843std::vector<void (*)(BatchesHandle)> getFunctions()
844{
845 return {computeAddPdf,
884}
885} // End namespace RF_ARCH
886} // End namespace RooBatchCompute
#define STEP
#define BEGIN
#define c(i)
Definition RSha256.hxx:101
#define __rooglobal__
#define X(type, name)
#define N
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char x2
Option_t Option_t TPoint TPoint const char x1
float xmin
float xmax
__roodevice__ std::size_t getNEvents() const
Definition Batches.h:69
__roodevice__ std::size_t getNExtraArgs() const
Definition Batches.h:70
__roodevice__ void setExtraArg(std::size_t i, double val)
Definition Batches.h:72
__roodevice__ double extraArg(std::size_t i) const
Definition Batches.h:71
const Double_t sigma
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
#define G(x, y, z)
__rooglobal__ void computeTruthModelCosBasis(BatchesHandle batches)
__rooglobal__ void computeExponential(BatchesHandle batches)
__rooglobal__ void computeDstD0BG(BatchesHandle batches)
__rooglobal__ void computeLandau(BatchesHandle batches)
__rooglobal__ void computeArgusBG(BatchesHandle batches)
__rooglobal__ void computeBukin(BatchesHandle batches)
__rooglobal__ void computeLognormalStandard(BatchesHandle batches)
__rooglobal__ void computeNovosibirsk(BatchesHandle batches)
__rooglobal__ void computeNormalizedPdf(BatchesHandle batches)
__rooglobal__ void computePolynomial(BatchesHandle batches)
__rooglobal__ void computeTruthModelSinhBasis(BatchesHandle batches)
__rooglobal__ void computePoisson(BatchesHandle batches)
__rooglobal__ void computeTruthModelCoshBasis(BatchesHandle batches)
__rooglobal__ void computeBifurGauss(BatchesHandle batches)
__rooglobal__ void computeTruthModelSinBasis(BatchesHandle batches)
__rooglobal__ void computeExpPoly(BatchesHandle batches)
__rooglobal__ void computeRatio(BatchesHandle batches)
__rooglobal__ void computeAddPdf(BatchesHandle batches)
__rooglobal__ void computeTruthModelLinBasis(BatchesHandle batches)
__rooglobal__ void computeChiSquare(BatchesHandle batches)
__rooglobal__ void computeTruthModelQuadBasis(BatchesHandle batches)
__rooglobal__ void computeDeltaFunction(BatchesHandle batches)
__rooglobal__ void computeChebychev(BatchesHandle batches)
__rooglobal__ void computeExponentialNeg(BatchesHandle batches)
__rooglobal__ void computeIdentity(BatchesHandle batches)
__rooglobal__ void computeLognormal(BatchesHandle batches)
__rooglobal__ void computeGaussModelExpBasis(BatchesHandle batches)
__rooglobal__ void computeVoigtian(BatchesHandle batches)
__rooglobal__ void computePower(BatchesHandle batches)
__rooglobal__ void computeTruthModelExpBasis(BatchesHandle batches)
__rooglobal__ void computeGaussian(BatchesHandle batches)
__rooglobal__ void computeBernstein(BatchesHandle batches)
__rooglobal__ void computeNegativeLogarithms(BatchesHandle batches)
__rooglobal__ void computeGamma(BatchesHandle batches)
std::vector< void(*)(BatchesHandle)> getFunctions()
Returns a std::vector of pointers to the compute functions in this file.
Batches & BatchesHandle
Definition Batches.h:84
__rooglobal__ void computeJohnson(BatchesHandle batches)
__rooglobal__ void computeBreitWigner(BatchesHandle batches)
__rooglobal__ void computeProdPdf(BatchesHandle batches)
__rooglobal__ void computeCBShape(BatchesHandle batches)
__rooglobal__ void computeBMixDecay(BatchesHandle batches)
Namespace for dispatching RooFit computations to various backends.
__roodevice__ double fast_exp(double x)
__roodevice__ double fast_sin(double x)
constexpr std::size_t bufferSize
Definition Batches.h:32
__roodevice__ double fast_log(double x)
__roodevice__ double fast_cos(double x)
__roodevice__ double fast_isqrt(double x)
__roodevice__ __roohost__ STD::complex< double > faddeeva(STD::complex< double > z)
__roohost__ __roodevice__ STD::complex< double > evalCerf(double swt, double u, double c)
constexpr Double_t TwoPi()
Definition TMath.h:44
#define R1(v, w, x, y, z, i)
Definition sha1.inl:134
#define R2(v, w, x, y, z, i)
Definition sha1.inl:137
__roodevice__ static __roohost__ double packFloatIntoNaN(float payload)
Pack float into mantissa of a NaN.
TMarker m
Definition textangle.C:8