Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
ComputeFunctions.cxx
Go to the documentation of this file.
1/*
2 * Project: RooFit
3 * Authors:
4 * Emmanouil Michalainas, CERN, Summer 2019
5 *
6 * Copyright (c) 2021, CERN
7 *
8 * Redistribution and use in source and binary forms,
9 * with or without modification, are permitted according to the terms
10 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
11 */
12
13/**
14\file ComputeFunctions.cxx
15\ingroup Roobatchcompute
16
17This file contains vectorizable computation functions for PDFs and other Roofit objects.
18The same source file can also be compiled with nvcc. All functions have a single `Batches`
19object as an argument passed by value, which contains all the information necessary for the
20computation. In case of cuda computations, the loops have a step (stride) the size of the grid
21which allows for reusing the same code as the cpu implementations, easier debugging and in terms
22of performance, maximum memory coalescing. For more details, see
23https://developer.nvidia.com/blog/cuda-pro-tip-write-flexible-kernels-grid-stride-loops/
24**/
25
26#include "RooBatchCompute.h"
27#include "RooNaNPacker.h"
28#include "RooVDTHeaders.h"
29#include "Batches.h"
30
31#include <TMath.h>
32
34
35#ifdef __CUDACC__
36#define BEGIN blockDim.x *blockIdx.x + threadIdx.x
37#define STEP blockDim.x *gridDim.x
38#else
39#define BEGIN 0
40#define STEP 1
41#endif // #ifdef __CUDACC__
42
43namespace RooBatchCompute {
44namespace RF_ARCH {
45
47{
48 const int nPdfs = batches.getNExtraArgs();
49 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
50 batches._output[i] = batches.extraArg(0) * batches[0][i];
51 for (int pdf = 1; pdf < nPdfs; pdf++)
52 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
53 batches._output[i] += batches.extraArg(pdf) * batches[pdf][i];
54}
55
57{
58 Batch m = batches[0], m0 = batches[1], c = batches[2], p = batches[3];
59 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
60 const double t = m[i] / m0[i];
61 const double u = 1 - t * t;
62 batches._output[i] = c[i] * u + p[i] * fast_log(u);
63 }
64 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
65 if (m[i] >= m0[i])
66 batches._output[i] = 0.0;
67 else
68 batches._output[i] = m[i] * fast_exp(batches._output[i]);
69 }
70}
71
73{
74 Batch coef0 = batches[0];
75 Batch coef1 = batches[1];
76 Batch tagFlav = batches[2];
77 Batch delMistag = batches[3];
78 Batch mixState = batches[4];
79 Batch mistag = batches[5];
80
81 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
82 batches._output[i] =
83 coef0[i] * (1.0 - tagFlav[i] * delMistag[0]) + coef1[i] * (mixState[i] * (1.0 - 2.0 * mistag[0]));
84 }
85}
86
88{
89 const int nCoef = batches.getNExtraArgs() - 2;
90 const int degree = nCoef - 1;
91 const double xmin = batches.extraArg(nCoef);
92 const double xmax = batches.extraArg(nCoef + 1);
93 Batch xData = batches[0];
94
95 // apply binomial coefficient in-place so we don't have to allocate new memory
96 double binomial = 1.0;
97 for (int k = 0; k < nCoef; k++) {
98 batches.setExtraArg(k, batches.extraArg(k) * binomial);
99 binomial = (binomial * (degree - k)) / (k + 1);
100 }
101
102 if (STEP == 1) {
103 double X[bufferSize], _1_X[bufferSize], powX[bufferSize], pow_1_X[bufferSize];
104 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
105 powX[i] = pow_1_X[i] = 1.0;
106 X[i] = (xData[i] - xmin) / (xmax - xmin);
107 _1_X[i] = 1 - X[i];
108 batches._output[i] = 0.0;
109 }
110
111 // raising 1-x to the power of degree
112 for (int k = 2; k <= degree; k += 2)
113 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
114 pow_1_X[i] *= _1_X[i] * _1_X[i];
115
116 if (degree % 2 == 1)
117 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
118 pow_1_X[i] *= _1_X[i];
119
120 // inverting 1-x ---> 1/(1-x)
121 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
122 _1_X[i] = 1 / _1_X[i];
123
124 for (int k = 0; k < nCoef; k++)
125 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
126 batches._output[i] += batches.extraArg(k) * powX[i] * pow_1_X[i];
127
128 // calculating next power for x and 1-x
129 powX[i] *= X[i];
130 pow_1_X[i] *= _1_X[i];
131 }
132 } else
133 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
134 batches._output[i] = 0.0;
135 const double X = (xData[i] - xmin) / (xmax - xmin);
136 double powX = 1.0, pow_1_X = 1.0;
137 for (int k = 1; k <= degree; k++)
138 pow_1_X *= 1 - X;
139 const double _1_X = 1 / (1 - X);
140 for (int k = 0; k < nCoef; k++) {
141 batches._output[i] += batches.extraArg(k) * powX * pow_1_X;
142 powX *= X;
143 pow_1_X *= _1_X;
144 }
145 }
146
147 // reset extraArgs values so we don't mutate the Batches object
148 binomial = 1.0;
149 for (int k = 0; k < nCoef; k++) {
150 batches.setExtraArg(k, batches.extraArg(k) / binomial);
151 binomial = (binomial * (degree - k)) / (k + 1);
152 }
153}
154
156{
157 Batch X = batches[0], M = batches[1], SL = batches[2], SR = batches[3];
158 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
159 double arg = X[i] - M[i];
160 if (arg < 0)
161 arg /= SL[i];
162 else
163 arg /= SR[i];
164 batches._output[i] = fast_exp(-0.5 * arg * arg);
165 }
166}
167
169{
170 Batch X = batches[0], M = batches[1], W = batches[2];
171 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
172 const double arg = X[i] - M[i];
173 batches._output[i] = 1 / (arg * arg + 0.25 * W[i] * W[i]);
174 }
175}
176
178{
179 Batch X = batches[0], XP = batches[1], SP = batches[2], XI = batches[3], R1 = batches[4], R2 = batches[5];
180 const double r3 = log(2.0);
181 const double r6 = exp(-6.0);
182 const double r7 = 2 * sqrt(2 * log(2.0));
183
184 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
185 const double r1 = XI[i] * fast_isqrt(XI[i] * XI[i] + 1);
186 const double r4 = 1 / fast_isqrt(XI[i] * XI[i] + 1);
187 const double hp = 1 / (SP[i] * r7);
188 const double x1 = XP[i] + 0.5 * SP[i] * r7 * (r1 - 1);
189 const double x2 = XP[i] + 0.5 * SP[i] * r7 * (r1 + 1);
190
191 double r5 = 1.0;
192 if (XI[i] > r6 || XI[i] < -r6)
193 r5 = XI[i] / fast_log(r4 + XI[i]);
194
195 double factor = 1, y = X[i] - x1, Yp = XP[i] - x1, yi = r4 - XI[i], rho = R1[i];
196 if (X[i] >= x2) {
197 factor = -1;
198 y = X[i] - x2;
199 Yp = XP[i] - x2;
200 yi = r4 + XI[i];
201 rho = R2[i];
202 }
203
204 batches._output[i] = rho * y * y / Yp / Yp - r3 + factor * 4 * r3 * y * hp * r5 * r4 / yi / yi;
205 if (X[i] >= x1 && X[i] < x2) {
206 batches._output[i] =
207 fast_log(1 + 4 * XI[i] * r4 * (X[i] - XP[i]) * hp) / fast_log(1 + 2 * XI[i] * (XI[i] - r4));
208 batches._output[i] *= -batches._output[i] * r3;
209 }
210 if (X[i] >= x1 && X[i] < x2 && XI[i] < r6 && XI[i] > -r6)
211 batches._output[i] = -4 * r3 * (X[i] - XP[i]) * (X[i] - XP[i]) * hp * hp;
212 }
213 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
214 batches._output[i] = fast_exp(batches._output[i]);
215}
216
218{
219 Batch M = batches[0], M0 = batches[1], S = batches[2], A = batches[3], N = batches[4];
220 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
221 const double t = (M[i] - M0[i]) / S[i];
222 if ((A[i] > 0 && t >= -A[i]) || (A[i] < 0 && -t >= A[i]))
223 batches._output[i] = -0.5 * t * t;
224 else {
225 batches._output[i] = N[i] / (N[i] - A[i] * A[i] - A[i] * t);
226 batches._output[i] = fast_log(batches._output[i]);
227 batches._output[i] *= N[i];
228 batches._output[i] -= 0.5 * A[i] * A[i];
229 }
230 }
231 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
232 batches._output[i] = fast_exp(batches._output[i]);
233}
234
236{
237 Batch xData = batches[0];
238 const int nCoef = batches.getNExtraArgs() - 2;
239 const double xmin = batches.extraArg(nCoef);
240 const double xmax = batches.extraArg(nCoef + 1);
241
242 if (STEP == 1) {
243 double prev[bufferSize][2], X[bufferSize];
244
245 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
246 // set a0-->prev[i][0] and a1-->prev[i][1]
247 // and x tranfsformed to range[-1..1]-->X[i]
248 prev[i][0] = batches._output[i] = 1.0;
249 prev[i][1] = X[i] = 2 * (xData[i] - 0.5 * (xmax + xmin)) / (xmax - xmin);
250 }
251 for (int k = 0; k < nCoef; k++)
252 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
253 batches._output[i] += prev[i][1] * batches.extraArg(k);
254
255 // compute next order
256 const double next = 2 * X[i] * prev[i][1] - prev[i][0];
257 prev[i][0] = prev[i][1];
258 prev[i][1] = next;
259 }
260 } else
261 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
262 double prev0 = 1.0, prev1 = 2 * (xData[i] - 0.5 * (xmax + xmin)) / (xmax - xmin), X = prev1;
263 batches._output[i] = 1.0;
264 for (int k = 0; k < nCoef; k++) {
265 batches._output[i] += prev1 * batches.extraArg(k);
266
267 // compute next order
268 const double next = 2 * X * prev1 - prev0;
269 prev0 = prev1;
270 prev1 = next;
271 }
272 }
273}
274
276{
277 Batch X = batches[0];
278 const double ndof = batches.extraArg(0);
279 const double gamma = 1 / std::tgamma(ndof / 2.0);
280 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
281 batches._output[i] = gamma;
282
283 constexpr double ln2 = 0.693147180559945309417232121458;
284 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
285 double arg = (ndof - 2) * fast_log(X[i]) - X[i] - ndof * ln2;
286 batches._output[i] *= fast_exp(0.5 * arg);
287 }
288}
289
291{
292 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
293 batches._output[i] = 0.0 + (batches[0][i] == 1.0);
294 }
295}
296
298{
299 Batch DM = batches[0], DM0 = batches[1], C = batches[2], A = batches[3], B = batches[4];
300 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
301 const double ratio = DM[i] / DM0[i];
302 const double arg1 = (DM0[i] - DM[i]) / C[i];
303 const double arg2 = A[i] * fast_log(ratio);
304 batches._output[i] = (1 - fast_exp(arg1)) * fast_exp(arg2) + B[i] * (ratio - 1);
305 }
306
307 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
308 if (batches._output[i] < 0)
309 batches._output[i] = 0;
310}
311
313{
314 Batch x = batches[0], c = batches[1];
315 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
316 batches._output[i] = fast_exp(x[i] * c[i]);
317}
318
320{
321 Batch X = batches[0], G = batches[1], B = batches[2], M = batches[3];
322 double gamma = -std::lgamma(G[0]);
323 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
324 if (X[i] == M[i])
325 batches._output[i] = (G[i] == 1.0) / B[i];
326 else if (G.isItVector())
327 batches._output[i] = -std::lgamma(G[i]);
328 else
329 batches._output[i] = gamma;
330
331 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
332 if (X[i] != M[i]) {
333 const double invBeta = 1 / B[i];
334 double arg = (X[i] - M[i]) * invBeta;
335 batches._output[i] -= arg;
336 arg = fast_log(arg);
337 batches._output[i] += arg * (G[i] - 1);
338 batches._output[i] = fast_exp(batches._output[i]);
339 batches._output[i] *= invBeta;
340 }
341}
342
344{
345 const double root2 = std::sqrt(2.);
346 const double root2pi = std::sqrt(2. * std::atan2(0., -1.));
347
348 const bool isMinus = batches.extraArg(0) < 0.0;
349 const bool isPlus = batches.extraArg(0) > 0.0;
350
351 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
352
353 const double x = batches[0][i];
354 const double mean = batches[1][i] * batches[2][i];
355 const double sigma = batches[3][i] * batches[4][i];
356 const double tau = batches[5][i];
357
358 if (tau == 0.0) {
359 // Straight Gaussian, used for unconvoluted PDF or expBasis with 0 lifetime
360 double xprime = (x - mean) / sigma;
361 double result = std::exp(-0.5 * xprime * xprime) / (sigma * root2pi);
362 if (!isMinus && !isPlus)
363 result *= 2;
364 batches._output[i] = result;
365 } else {
366 // Convolution with exp(-t/tau)
367 const double xprime = (x - mean) / tau;
368 const double c = sigma / (root2 * tau);
369 const double u = xprime / (2 * c);
370
371 double result = 0.0;
372 if (!isMinus)
373 result += RooHeterogeneousMath::evalCerf(0, -u, c).real();
374 if (!isPlus)
375 result += RooHeterogeneousMath::evalCerf(0, u, c).real();
376 batches._output[i] = result;
377 }
378 }
379}
380
382{
383 auto x = batches[0];
384 auto mean = batches[1];
385 auto sigma = batches[2];
386 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
387 const double arg = x[i] - mean[i];
388 const double halfBySigmaSq = -0.5 / (sigma[i] * sigma[i]);
389 batches._output[i] = fast_exp(arg * arg * halfBySigmaSq);
390 }
391}
392
394{
395 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
396 batches._output[i] = batches[0][i];
397 }
398}
399
401{
402 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
403 batches._output[i] = -fast_log(batches[0][i]);
404 // Multiply by weights if they exist
405 if (batches.extraArg(0))
406 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
407 batches._output[i] *= batches[1][i];
408}
409
411{
412 Batch mass = batches[0], mu = batches[1], lambda = batches[2], gamma = batches[3], delta = batches[4];
413 const double sqrtTwoPi = std::sqrt(TMath::TwoPi());
414 const double massThreshold = batches.extraArg(0);
415
416 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
417 const double arg = (mass[i] - mu[i]) / lambda[i];
418#ifdef R__HAS_VDT
419 const double asinh_arg = fast_log(arg + 1 / fast_isqrt(arg * arg + 1));
420#else
421 const double asinh_arg = asinh(arg);
422#endif
423 const double expo = gamma[i] + delta[i] * asinh_arg;
424 const double result =
425 delta[i] * fast_exp(-0.5 * expo * expo) * fast_isqrt(1. + arg * arg) / (sqrtTwoPi * lambda[i]);
426
427 const double passThrough = mass[i] >= massThreshold;
428 batches._output[i] = result * passThrough;
429 }
430}
431
432/* Actual computation of Landau(x,mean,sigma) in a vectorization-friendly way
433 * Code copied from function landau_pdf (math/mathcore/src/PdfFuncMathCore.cxx)
434 * and rewritten to enable vectorization.
435 */
437{
438 auto case0 = [](double x) {
439 const double a1[3] = {0.04166666667, -0.01996527778, 0.02709538966};
440 const double u = fast_exp(x + 1.0);
441 return 0.3989422803 * fast_exp(-1 / u - 0.5 * (x + 1)) * (1 + (a1[0] + (a1[1] + a1[2] * u) * u) * u);
442 };
443 auto case1 = [](double x) {
444 constexpr double p1[5] = {0.4259894875, -0.1249762550, 0.03984243700, -0.006298287635, 0.001511162253};
445 constexpr double q1[5] = {1.0, -0.3388260629, 0.09594393323, -0.01608042283, 0.003778942063};
446 const double u = fast_exp(-x - 1);
447 return fast_exp(-u - 0.5 * (x + 1)) * (p1[0] + (p1[1] + (p1[2] + (p1[3] + p1[4] * x) * x) * x) * x) /
448 (q1[0] + (q1[1] + (q1[2] + (q1[3] + q1[4] * x) * x) * x) * x);
449 };
450 auto case2 = [](double x) {
451 constexpr double p2[5] = {0.1788541609, 0.1173957403, 0.01488850518, -0.001394989411, 0.0001283617211};
452 constexpr double q2[5] = {1.0, 0.7428795082, 0.3153932961, 0.06694219548, 0.008790609714};
453 return (p2[0] + (p2[1] + (p2[2] + (p2[3] + p2[4] * x) * x) * x) * x) /
454 (q2[0] + (q2[1] + (q2[2] + (q2[3] + q2[4] * x) * x) * x) * x);
455 };
456 auto case3 = [](double x) {
457 constexpr double p3[5] = {0.1788544503, 0.09359161662, 0.006325387654, 0.00006611667319, -0.000002031049101};
458 constexpr double q3[5] = {1.0, 0.6097809921, 0.2560616665, 0.04746722384, 0.006957301675};
459 return (p3[0] + (p3[1] + (p3[2] + (p3[3] + p3[4] * x) * x) * x) * x) /
460 (q3[0] + (q3[1] + (q3[2] + (q3[3] + q3[4] * x) * x) * x) * x);
461 };
462 auto case4 = [](double x) {
463 constexpr double p4[5] = {0.9874054407, 118.6723273, 849.2794360, -743.7792444, 427.0262186};
464 constexpr double q4[5] = {1.0, 106.8615961, 337.6496214, 2016.712389, 1597.063511};
465 const double u = 1 / x;
466 return u * u * (p4[0] + (p4[1] + (p4[2] + (p4[3] + p4[4] * u) * u) * u) * u) /
467 (q4[0] + (q4[1] + (q4[2] + (q4[3] + q4[4] * u) * u) * u) * u);
468 };
469 auto case5 = [](double x) {
470 constexpr double p5[5] = {1.003675074, 167.5702434, 4789.711289, 21217.86767, -22324.94910};
471 constexpr double q5[5] = {1.0, 156.9424537, 3745.310488, 9834.698876, 66924.28357};
472 const double u = 1 / x;
473 return u * u * (p5[0] + (p5[1] + (p5[2] + (p5[3] + p5[4] * u) * u) * u) * u) /
474 (q5[0] + (q5[1] + (q5[2] + (q5[3] + q5[4] * u) * u) * u) * u);
475 };
476 auto case6 = [](double x) {
477 constexpr double p6[5] = {1.000827619, 664.9143136, 62972.92665, 475554.6998, -5743609.109};
478 constexpr double q6[5] = {1.0, 651.4101098, 56974.73333, 165917.4725, -2815759.939};
479 const double u = 1 / x;
480 return u * u * (p6[0] + (p6[1] + (p6[2] + (p6[3] + p6[4] * u) * u) * u) * u) /
481 (q6[0] + (q6[1] + (q6[2] + (q6[3] + q6[4] * u) * u) * u) * u);
482 };
483 auto case7 = [](double x) {
484 const double a2[2] = {-1.845568670, -4.284640743};
485 const double u = 1 / (x - x * fast_log(x) / (x + 1));
486 return u * u * (1 + (a2[0] + a2[1] * u) * u);
487 };
488
489 Batch X = batches[0], M = batches[1], S = batches[2];
490
491 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
492 batches._output[i] = (X[i] - M[i]) / S[i];
493
494 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
495 if (S[i] <= 0.0)
496 batches._output[i] = 0;
497 else if (batches._output[i] < -5.5)
498 batches._output[i] = case0(batches._output[i]);
499 else if (batches._output[i] < -1.0)
500 batches._output[i] = case1(batches._output[i]);
501 else if (batches._output[i] < 1.0)
502 batches._output[i] = case2(batches._output[i]);
503 else if (batches._output[i] < 5.0)
504 batches._output[i] = case3(batches._output[i]);
505 else if (batches._output[i] < 12.0)
506 batches._output[i] = case4(batches._output[i]);
507 else if (batches._output[i] < 50.0)
508 batches._output[i] = case5(batches._output[i]);
509 else if (batches._output[i] < 300.)
510 batches._output[i] = case6(batches._output[i]);
511 else
512 batches._output[i] = case7(batches._output[i]);
513}
514
516{
517 Batch X = batches[0], M0 = batches[1], K = batches[2];
518 const double rootOf2pi = 2.506628274631000502415765284811;
519 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
520 double lnxOverM0 = fast_log(X[i] / M0[i]);
521 double lnk = fast_log(K[i]);
522 if (lnk < 0)
523 lnk = -lnk;
524 double arg = lnxOverM0 / lnk;
525 arg *= -0.5 * arg;
526 batches._output[i] = fast_exp(arg) / (X[i] * lnk * rootOf2pi);
527 }
528}
529
531{
532 auto rawVal = batches[0];
533 auto normVal = batches[1];
534
535 int nEvalErrorsType0 = 0;
536 int nEvalErrorsType1 = 0;
537 int nEvalErrorsType2 = 0;
538
539 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
540 double out = 0.0;
541 // batches._output[i] = rawVal[i] / normVar[i];
542 if (normVal[i] < 0. || (normVal[i] == 0. && rawVal[i] != 0)) {
543 // Unreasonable normalisations. A zero integral can be tolerated if the function vanishes, though.
544 out = RooNaNPacker::packFloatIntoNaN(-normVal[i] + (rawVal[i] < 0. ? -rawVal[i] : 0.));
545 nEvalErrorsType0++;
546 } else if (rawVal[i] < 0.) {
547 // The pdf value is less than zero.
548 out = RooNaNPacker::packFloatIntoNaN(-rawVal[i]);
549 nEvalErrorsType1++;
550 } else if (std::isnan(rawVal[i])) {
551 // The pdf value is Not-a-Number.
552 out = rawVal[i];
553 nEvalErrorsType2++;
554 } else {
555 out = (rawVal[i] == 0. && normVal[i] == 0.) ? 0. : rawVal[i] / normVal[i];
556 }
557 batches._output[i] = out;
558 }
559
560 if (nEvalErrorsType0 > 0)
561 batches.setExtraArg(0, batches.extraArg(0) + nEvalErrorsType0);
562 if (nEvalErrorsType1 > 1)
563 batches.setExtraArg(1, batches.extraArg(1) + nEvalErrorsType1);
564 if (nEvalErrorsType2 > 2)
565 batches.setExtraArg(2, batches.extraArg(2) + nEvalErrorsType2);
566}
567
568/* TMath::ASinH(x) needs to be replaced with ln( x + sqrt(x^2+1))
569 * argasinh -> the argument of TMath::ASinH()
570 * argln -> the argument of the logarithm that replaces AsinH
571 * asinh -> the value that the function evaluates to
572 *
573 * ln is the logarithm that was solely present in the initial
574 * formula, that is before the asinh replacement
575 */
577{
578 Batch X = batches[0], P = batches[1], W = batches[2], T = batches[3];
579 constexpr double xi = 2.3548200450309494; // 2 Sqrt( Ln(4) )
580 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
581 double argasinh = 0.5 * xi * T[i];
582 double argln = argasinh + 1 / fast_isqrt(argasinh * argasinh + 1);
583 double asinh = fast_log(argln);
584
585 double argln2 = 1 - (X[i] - P[i]) * T[i] / W[i];
586 double ln = fast_log(argln2);
587 batches._output[i] = ln / asinh;
588 batches._output[i] *= -0.125 * xi * xi * batches._output[i];
589 batches._output[i] -= 2.0 / xi / xi * asinh * asinh;
590 }
591
592 // faster if you exponentiate in a seperate loop (dark magic!)
593 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
594 batches._output[i] = fast_exp(batches._output[i]);
595}
596
598{
599 Batch x = batches[0], mean = batches[1];
600 bool protectNegative = batches.extraArg(0);
601 bool noRounding = batches.extraArg(1);
602 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
603 const double x_i = noRounding ? x[i] : floor(x[i]);
604 batches._output[i] = std::lgamma(x_i + 1.);
605 }
606
607 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
608 const double x_i = noRounding ? x[i] : floor(x[i]);
609 const double logMean = fast_log(mean[i]);
610 const double logPoisson = x_i * logMean - mean[i] - batches._output[i];
611 batches._output[i] = fast_exp(logPoisson);
612
613 // Cosmetics
614 if (x_i < 0)
615 batches._output[i] = 0;
616 else if (x_i == 0)
617 batches._output[i] = 1 / fast_exp(mean[i]);
618
619 if (protectNegative && mean[i] < 0)
620 batches._output[i] = 1.E-3;
621 }
622}
623
625{
626 const int nCoef = batches.extraArg(0);
627 const std::size_t nEvents = batches.getNEvents();
628 Batch x = batches[nCoef];
629
630 for (size_t i = BEGIN; i < nEvents; i += STEP) {
631 batches._output[i] = batches[nCoef - 1][i];
632 }
633
634 // Indexes are in range 0..nCoef-1 but coefList[nCoef-1] has already been
635 // processed.
636 for (int k = nCoef - 2; k >= 0; k--) {
637 for (size_t i = BEGIN; i < nEvents; i += STEP) {
638 batches._output[i] = batches[k][i] + x[i] * batches._output[i];
639 }
640 }
641}
642
644{
645 const int nPdfs = batches.extraArg(0);
646 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
647 batches._output[i] = 1.;
648 }
649 for (int pdf = 0; pdf < nPdfs; pdf++) {
650 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
651 batches._output[i] *= batches[pdf][i];
652 }
653 }
654}
655
657{
658 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
659 batches._output[i] = batches[0][i] / batches[1][i];
660 }
661}
662
664{
665
666 const bool isMinus = batches.extraArg(0) < 0.0;
667 const bool isPlus = batches.extraArg(0) > 0.0;
668 for (std::size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
669 double x = batches[0][i];
670 // Enforce sign compatibility
671 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
672 batches._output[i] = isOutOfSign ? 0.0 : fast_exp(-std::abs(x) / batches[1][i]);
673 }
674}
675
677{
678 const bool isMinus = batches.extraArg(0) < 0.0;
679 const bool isPlus = batches.extraArg(0) > 0.0;
680 for (std::size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
681 double x = batches[0][i];
682 // Enforce sign compatibility
683 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
684 batches._output[i] = isOutOfSign ? 0.0 : fast_exp(-std::abs(x) / batches[1][i]) * fast_sin(x * batches[2][i]);
685 }
686}
687
689{
690 const bool isMinus = batches.extraArg(0) < 0.0;
691 const bool isPlus = batches.extraArg(0) > 0.0;
692 for (std::size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
693 double x = batches[0][i];
694 // Enforce sign compatibility
695 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
696 batches._output[i] = isOutOfSign ? 0.0 : fast_exp(-std::abs(x) / batches[1][i]) * fast_cos(x * batches[2][i]);
697 }
698}
699
701{
702 const bool isMinus = batches.extraArg(0) < 0.0;
703 const bool isPlus = batches.extraArg(0) > 0.0;
704 for (std::size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
705 double x = batches[0][i];
706 // Enforce sign compatibility
707 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
708 if (isOutOfSign) {
709 batches._output[i] = 0.0;
710 } else {
711 const double tscaled = std::abs(x) / batches[1][i];
712 batches._output[i] = fast_exp(-tscaled) * tscaled;
713 }
714 }
715}
716
718{
719 const bool isMinus = batches.extraArg(0) < 0.0;
720 const bool isPlus = batches.extraArg(0) > 0.0;
721 for (std::size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
722 double x = batches[0][i];
723 // Enforce sign compatibility
724 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
725 if (isOutOfSign) {
726 batches._output[i] = 0.0;
727 } else {
728 const double tscaled = std::abs(x) / batches[1][i];
729 batches._output[i] = fast_exp(-tscaled) * tscaled * tscaled;
730 }
731 }
732}
733
735{
736 const bool isMinus = batches.extraArg(0) < 0.0;
737 const bool isPlus = batches.extraArg(0) > 0.0;
738 for (std::size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
739 double x = batches[0][i];
740 // Enforce sign compatibility
741 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
742 batches._output[i] = isOutOfSign ? 0.0 : fast_exp(-std::abs(x) / batches[1][i]) * sinh(x * batches[2][i] * 0.5);
743 }
744}
745
747{
748 const bool isMinus = batches.extraArg(0) < 0.0;
749 const bool isPlus = batches.extraArg(0) > 0.0;
750 for (std::size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
751 double x = batches[0][i];
752 // Enforce sign compatibility
753 const bool isOutOfSign = (isMinus && x > 0.0) || (isPlus && x < 0.0);
754 batches._output[i] = isOutOfSign ? 0.0 : fast_exp(-std::abs(x) / batches[1][i]) * cosh(x * batches[2][i] * .5);
755 }
756}
757
759{
760 Batch X = batches[0], M = batches[1], W = batches[2], S = batches[3];
761 const double invSqrt2 = 0.707106781186547524400844362105;
762 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
763 const double arg = (X[i] - M[i]) * (X[i] - M[i]);
764 if (S[i] == 0.0 && W[i] == 0.0)
765 batches._output[i] = 1.0;
766 else if (S[i] == 0.0)
767 batches._output[i] = 1 / (arg + 0.25 * W[i] * W[i]);
768 else if (W[i] == 0.0)
769 batches._output[i] = fast_exp(-0.5 * arg / (S[i] * S[i]));
770 else
771 batches._output[i] = invSqrt2 / S[i];
772 }
773
774 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
775 if (S[i] != 0.0 && W[i] != 0.0) {
776 if (batches._output[i] < 0)
777 batches._output[i] = -batches._output[i];
778 const double factor = W[i] > 0.0 ? 0.5 : -0.5;
779 RooHeterogeneousMath::STD::complex<double> z(batches._output[i] * (X[i] - M[i]),
780 factor * batches._output[i] * W[i]);
781 batches._output[i] *= RooHeterogeneousMath::faddeeva(z).real();
782 }
783 }
784}
785
786/// Returns a std::vector of pointers to the compute functions in this file.
787std::vector<void (*)(BatchesHandle)> getFunctions()
788{
789 return {computeAddPdf,
824}
825} // End namespace RF_ARCH
826} // End namespace RooBatchCompute
#define STEP
#define BEGIN
#define c(i)
Definition RSha256.hxx:101
#define __rooglobal__
#define N
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char x2
Option_t Option_t TPoint TPoint const char x1
float xmin
float xmax
__roodevice__ std::size_t getNEvents() const
Definition Batches.h:99
__roodevice__ std::size_t getNExtraArgs() const
Definition Batches.h:100
__roodevice__ void setExtraArg(std::size_t i, double val)
Definition Batches.h:102
__roodevice__ double extraArg(std::size_t i) const
Definition Batches.h:101
const Double_t sigma
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
#define G(x, y, z)
__rooglobal__ void computeTruthModelCosBasis(BatchesHandle batches)
__rooglobal__ void computeExponential(BatchesHandle batches)
__rooglobal__ void computeDstD0BG(BatchesHandle batches)
__rooglobal__ void computeLandau(BatchesHandle batches)
__rooglobal__ void computeArgusBG(BatchesHandle batches)
__rooglobal__ void computeBukin(BatchesHandle batches)
__rooglobal__ void computeNovosibirsk(BatchesHandle batches)
__rooglobal__ void computeNormalizedPdf(BatchesHandle batches)
__rooglobal__ void computePolynomial(BatchesHandle batches)
__rooglobal__ void computeTruthModelSinhBasis(BatchesHandle batches)
__rooglobal__ void computePoisson(BatchesHandle batches)
__rooglobal__ void computeTruthModelCoshBasis(BatchesHandle batches)
__rooglobal__ void computeBifurGauss(BatchesHandle batches)
__rooglobal__ void computeTruthModelSinBasis(BatchesHandle batches)
__rooglobal__ void computeRatio(BatchesHandle batches)
__rooglobal__ void computeAddPdf(BatchesHandle batches)
__rooglobal__ void computeTruthModelLinBasis(BatchesHandle batches)
__rooglobal__ void computeChiSquare(BatchesHandle batches)
__rooglobal__ void computeTruthModelQuadBasis(BatchesHandle batches)
__rooglobal__ void computeDeltaFunction(BatchesHandle batches)
__rooglobal__ void computeChebychev(BatchesHandle batches)
__rooglobal__ void computeIdentity(BatchesHandle batches)
__rooglobal__ void computeLognormal(BatchesHandle batches)
__rooglobal__ void computeGaussModelExpBasis(BatchesHandle batches)
__rooglobal__ void computeVoigtian(BatchesHandle batches)
__rooglobal__ void computeTruthModelExpBasis(BatchesHandle batches)
__rooglobal__ void computeGaussian(BatchesHandle batches)
__rooglobal__ void computeBernstein(BatchesHandle batches)
__rooglobal__ void computeNegativeLogarithms(BatchesHandle batches)
__rooglobal__ void computeGamma(BatchesHandle batches)
std::vector< void(*)(BatchesHandle)> getFunctions()
Returns a std::vector of pointers to the compute functions in this file.
__rooglobal__ void computeJohnson(BatchesHandle batches)
__rooglobal__ void computeBreitWigner(BatchesHandle batches)
__rooglobal__ void computeProdPdf(BatchesHandle batches)
__rooglobal__ void computeCBShape(BatchesHandle batches)
__rooglobal__ void computeBMixDecay(BatchesHandle batches)
Namespace for dispatching RooFit computations to various backends.
__roodevice__ double fast_exp(double x)
__roodevice__ double fast_sin(double x)
constexpr std::size_t bufferSize
Definition Batches.h:38
__roodevice__ double fast_log(double x)
__roodevice__ double fast_cos(double x)
__roodevice__ double fast_isqrt(double x)
__roodevice__ __roohost__ STD::complex< double > faddeeva(STD::complex< double > z)
__roohost__ __roodevice__ STD::complex< double > evalCerf(double swt, double u, double c)
constexpr Double_t TwoPi()
Definition TMath.h:44
#define R1(v, w, x, y, z, i)
Definition sha1.inl:134
#define R2(v, w, x, y, z, i)
Definition sha1.inl:137
__roodevice__ static __roohost__ double packFloatIntoNaN(float payload)
Pack float into mantissa of a NaN.
TMarker m
Definition textangle.C:8