Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
ComputeFunctions.cxx
Go to the documentation of this file.
1/*
2 * Project: RooFit
3 * Authors:
4 * Emmanouil Michalainas, CERN, Summer 2019
5 *
6 * Copyright (c) 2021, CERN
7 *
8 * Redistribution and use in source and binary forms,
9 * with or without modification, are permitted according to the terms
10 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
11 */
12
13/**
14\file ComputeFunctions.cxx
15\ingroup Roobatchcompute
16
17This file contains vectorizable computation functions for PDFs and other Roofit objects.
18The same source file can also be compiled with nvcc. All functions have a single `Batches`
19object as an argument passed by value, which contains all the information necessary for the
20computation. In case of cuda computations, the loops have a step (stride) the size of the grid
21which allows for reusing the same code as the cpu implementations, easier debugging and in terms
22of performance, maximum memory coalescing. For more details, see
23https://developer.nvidia.com/blog/cuda-pro-tip-write-flexible-kernels-grid-stride-loops/
24**/
25
26#include "RooBatchCompute.h"
27#include "RooVDTHeaders.h"
28#include "Batches.h"
29
30#include <TMath.h>
31
32#include <complex>
33
34#include "faddeeva_impl.h"
35
36#ifdef __CUDACC__
37#define BEGIN blockDim.x *blockIdx.x + threadIdx.x
38#define STEP blockDim.x *gridDim.x
39#else
40#define BEGIN 0
41#define STEP 1
42#endif // #ifdef __CUDACC__
43
44namespace RooBatchCompute {
45namespace RF_ARCH {
46
48{
49 const int nPdfs = batches.getNExtraArgs();
50 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
51 batches._output[i] = batches.extraArg(0) * batches[0][i];
52 for (int pdf = 1; pdf < nPdfs; pdf++)
53 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
54 batches._output[i] += batches.extraArg(pdf) * batches[pdf][i];
55}
56
58{
59 Batch m = batches[0], m0 = batches[1], c = batches[2], p = batches[3];
60 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
61 const double t = m[i] / m0[i];
62 const double u = 1 - t * t;
63 batches._output[i] = c[i] * u + p[i] * fast_log(u);
64 }
65 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
66 if (m[i] >= m0[i])
67 batches._output[i] = 0.0;
68 else
69 batches._output[i] = m[i] * fast_exp(batches._output[i]);
70 }
71}
72
74{
75 const int nCoef = batches.getNExtraArgs() - 2;
76 const int degree = nCoef - 1;
77 const double xmin = batches.extraArg(nCoef);
78 const double xmax = batches.extraArg(nCoef + 1);
79 Batch xData = batches[0];
80
81 // apply binomial coefficient in-place so we don't have to allocate new memory
82 double binomial = 1.0;
83 for (int k = 0; k < nCoef; k++) {
84 batches.setExtraArg(k, batches.extraArg(k) * binomial);
85 binomial = (binomial * (degree - k)) / (k + 1);
86 }
87
88 if (STEP == 1) {
89 double X[bufferSize], _1_X[bufferSize], powX[bufferSize], pow_1_X[bufferSize];
90 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
91 powX[i] = pow_1_X[i] = 1.0;
92 X[i] = (xData[i] - xmin) / (xmax - xmin);
93 _1_X[i] = 1 - X[i];
94 batches._output[i] = 0.0;
95 }
96
97 // raising 1-x to the power of degree
98 for (int k = 2; k <= degree; k += 2)
99 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
100 pow_1_X[i] *= _1_X[i] * _1_X[i];
101
102 if (degree % 2 == 1)
103 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
104 pow_1_X[i] *= _1_X[i];
105
106 // inverting 1-x ---> 1/(1-x)
107 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
108 _1_X[i] = 1 / _1_X[i];
109
110 for (int k = 0; k < nCoef; k++)
111 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
112 batches._output[i] += batches.extraArg(k) * powX[i] * pow_1_X[i];
113
114 // calculating next power for x and 1-x
115 powX[i] *= X[i];
116 pow_1_X[i] *= _1_X[i];
117 }
118 } else
119 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
120 batches._output[i] = 0.0;
121 const double X = (xData[i] - xmin) / (xmax - xmin);
122 double powX = 1.0, pow_1_X = 1.0;
123 for (int k = 1; k <= degree; k++)
124 pow_1_X *= 1 - X;
125 const double _1_X = 1 / (1 - X);
126 for (int k = 0; k < nCoef; k++) {
127 batches._output[i] += batches.extraArg(k) * powX * pow_1_X;
128 powX *= X;
129 pow_1_X *= _1_X;
130 }
131 }
132
133 // reset extraArgs values so we don't mutate the Batches object
134 binomial = 1.0;
135 for (int k = 0; k < nCoef; k++) {
136 batches.setExtraArg(k, batches.extraArg(k) / binomial);
137 binomial = (binomial * (degree - k)) / (k + 1);
138 }
139}
140
142{
143 Batch X = batches[0], M = batches[1], SL = batches[2], SR = batches[3];
144 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
145 double arg = X[i] - M[i];
146 if (arg < 0)
147 arg /= SL[i];
148 else
149 arg /= SR[i];
150 batches._output[i] = fast_exp(-0.5 * arg * arg);
151 }
152}
153
155{
156 Batch X = batches[0], M = batches[1], W = batches[2];
157 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
158 const double arg = X[i] - M[i];
159 batches._output[i] = 1 / (arg * arg + 0.25 * W[i] * W[i]);
160 }
161}
162
164{
165 Batch X = batches[0], XP = batches[1], SP = batches[2], XI = batches[3], R1 = batches[4], R2 = batches[5];
166 const double r3 = log(2.0);
167 const double r6 = exp(-6.0);
168 const double r7 = 2 * sqrt(2 * log(2.0));
169
170 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
171 const double r1 = XI[i] * fast_isqrt(XI[i] * XI[i] + 1);
172 const double r4 = 1 / fast_isqrt(XI[i] * XI[i] + 1);
173 const double hp = 1 / (SP[i] * r7);
174 const double x1 = XP[i] + 0.5 * SP[i] * r7 * (r1 - 1);
175 const double x2 = XP[i] + 0.5 * SP[i] * r7 * (r1 + 1);
176
177 double r5 = 1.0;
178 if (XI[i] > r6 || XI[i] < -r6)
179 r5 = XI[i] / fast_log(r4 + XI[i]);
180
181 double factor = 1, y = X[i] - x1, Yp = XP[i] - x1, yi = r4 - XI[i], rho = R1[i];
182 if (X[i] >= x2) {
183 factor = -1;
184 y = X[i] - x2;
185 Yp = XP[i] - x2;
186 yi = r4 + XI[i];
187 rho = R2[i];
188 }
189
190 batches._output[i] = rho * y * y / Yp / Yp - r3 + factor * 4 * r3 * y * hp * r5 * r4 / yi / yi;
191 if (X[i] >= x1 && X[i] < x2) {
192 batches._output[i] =
193 fast_log(1 + 4 * XI[i] * r4 * (X[i] - XP[i]) * hp) / fast_log(1 + 2 * XI[i] * (XI[i] - r4));
194 batches._output[i] *= -batches._output[i] * r3;
195 }
196 if (X[i] >= x1 && X[i] < x2 && XI[i] < r6 && XI[i] > -r6)
197 batches._output[i] = -4 * r3 * (X[i] - XP[i]) * (X[i] - XP[i]) * hp * hp;
198 }
199 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
200 batches._output[i] = fast_exp(batches._output[i]);
201}
202
204{
205 Batch M = batches[0], M0 = batches[1], S = batches[2], A = batches[3], N = batches[4];
206 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
207 const double t = (M[i] - M0[i]) / S[i];
208 if ((A[i] > 0 && t >= -A[i]) || (A[i] < 0 && -t >= A[i]))
209 batches._output[i] = -0.5 * t * t;
210 else {
211 batches._output[i] = N[i] / (N[i] - A[i] * A[i] - A[i] * t);
212 batches._output[i] = fast_log(batches._output[i]);
213 batches._output[i] *= N[i];
214 batches._output[i] -= 0.5 * A[i] * A[i];
215 }
216 }
217 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
218 batches._output[i] = fast_exp(batches._output[i]);
219}
220
222{
223 Batch xData = batches[0];
224 const int nCoef = batches.getNExtraArgs() - 2;
225 const double xmin = batches.extraArg(nCoef);
226 const double xmax = batches.extraArg(nCoef + 1);
227
228 if (STEP == 1) {
229 double prev[bufferSize][2], X[bufferSize];
230
231 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
232 // set a0-->prev[i][0] and a1-->prev[i][1]
233 // and x tranfsformed to range[-1..1]-->X[i]
234 prev[i][0] = batches._output[i] = 1.0;
235 prev[i][1] = X[i] = 2 * (xData[i] - 0.5 * (xmax + xmin)) / (xmax - xmin);
236 }
237 for (int k = 0; k < nCoef; k++)
238 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
239 batches._output[i] += prev[i][1] * batches.extraArg(k);
240
241 // compute next order
242 const double next = 2 * X[i] * prev[i][1] - prev[i][0];
243 prev[i][0] = prev[i][1];
244 prev[i][1] = next;
245 }
246 } else
247 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
248 double prev0 = 1.0, prev1 = 2 * (xData[i] - 0.5 * (xmax + xmin)) / (xmax - xmin), X = prev1;
249 batches._output[i] = 1.0;
250 for (int k = 0; k < nCoef; k++) {
251 batches._output[i] += prev1 * batches.extraArg(k);
252
253 // compute next order
254 const double next = 2 * X * prev1 - prev0;
255 prev0 = prev1;
256 prev1 = next;
257 }
258 }
259}
260
262{
263 Batch X = batches[0];
264 const double ndof = batches.extraArg(0);
265 const double gamma = 1 / std::tgamma(ndof / 2.0);
266 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
267 batches._output[i] = gamma;
268
269 constexpr double ln2 = 0.693147180559945309417232121458;
270 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
271 double arg = (ndof - 2) * fast_log(X[i]) - X[i] - ndof * ln2;
272 batches._output[i] *= fast_exp(0.5 * arg);
273 }
274}
275
277{
278 Batch DM = batches[0], DM0 = batches[1], C = batches[2], A = batches[3], B = batches[4];
279 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
280 const double ratio = DM[i] / DM0[i];
281 const double arg1 = (DM0[i] - DM[i]) / C[i];
282 const double arg2 = A[i] * fast_log(ratio);
283 batches._output[i] = (1 - fast_exp(arg1)) * fast_exp(arg2) + B[i] * (ratio - 1);
284 }
285
286 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
287 if (batches._output[i] < 0)
288 batches._output[i] = 0;
289}
290
292{
293 Batch x = batches[0], c = batches[1];
294 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
295 batches._output[i] = fast_exp(x[i] * c[i]);
296}
297
299{
300 Batch X = batches[0], G = batches[1], B = batches[2], M = batches[3];
301 double gamma = -std::lgamma(G[0]);
302 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
303 if (X[i] == M[i])
304 batches._output[i] = (G[i] == 1.0) / B[i];
305 else if (G.isItVector())
306 batches._output[i] = -std::lgamma(G[i]);
307 else
308 batches._output[i] = gamma;
309
310 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
311 if (X[i] != M[i]) {
312 const double invBeta = 1 / B[i];
313 double arg = (X[i] - M[i]) * invBeta;
314 batches._output[i] -= arg;
315 arg = fast_log(arg);
316 batches._output[i] += arg * (G[i] - 1);
317 batches._output[i] = fast_exp(batches._output[i]);
318 batches._output[i] *= invBeta;
319 }
320}
321
323{
324 auto x = batches[0], mean = batches[1], sigma = batches[2];
325 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
326 const double arg = x[i] - mean[i];
327 const double halfBySigmaSq = -0.5 / (sigma[i] * sigma[i]);
328 batches._output[i] = fast_exp(arg * arg * halfBySigmaSq);
329 }
330}
331
333{
334 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
335 batches._output[i] = -fast_log(batches[0][i]);
336 // Multiply by weights if they exist
337 if (batches.extraArg(0))
338 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
339 batches._output[i] *= batches[1][i];
340}
341
343{
344 Batch mass = batches[0], mu = batches[1], lambda = batches[2], gamma = batches[3], delta = batches[4];
345 const double sqrtTwoPi = std::sqrt(TMath::TwoPi());
346 const double massThreshold = batches.extraArg(0);
347
348 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
349 const double arg = (mass[i] - mu[i]) / lambda[i];
350#ifdef R__HAS_VDT
351 const double asinh_arg = fast_log(arg + 1 / fast_isqrt(arg * arg + 1));
352#else
353 const double asinh_arg = asinh(arg);
354#endif
355 const double expo = gamma[i] + delta[i] * asinh_arg;
356 const double result =
357 delta[i] * fast_exp(-0.5 * expo * expo) * fast_isqrt(1. + arg * arg) / (sqrtTwoPi * lambda[i]);
358
359 const double passThrough = mass[i] >= massThreshold;
360 batches._output[i] = result * passThrough;
361 }
362}
363
364/* Actual computation of Landau(x,mean,sigma) in a vectorization-friendly way
365 * Code copied from function landau_pdf (math/mathcore/src/PdfFuncMathCore.cxx)
366 * and rewritten to enable vectorization.
367 */
369{
370 auto case0 = [](double x) {
371 const double a1[3] = {0.04166666667, -0.01996527778, 0.02709538966};
372 const double u = fast_exp(x + 1.0);
373 return 0.3989422803 * fast_exp(-1 / u - 0.5 * (x + 1)) * (1 + (a1[0] + (a1[1] + a1[2] * u) * u) * u);
374 };
375 auto case1 = [](double x) {
376 constexpr double p1[5] = {0.4259894875, -0.1249762550, 0.03984243700, -0.006298287635, 0.001511162253};
377 constexpr double q1[5] = {1.0, -0.3388260629, 0.09594393323, -0.01608042283, 0.003778942063};
378 const double u = fast_exp(-x - 1);
379 return fast_exp(-u - 0.5 * (x + 1)) * (p1[0] + (p1[1] + (p1[2] + (p1[3] + p1[4] * x) * x) * x) * x) /
380 (q1[0] + (q1[1] + (q1[2] + (q1[3] + q1[4] * x) * x) * x) * x);
381 };
382 auto case2 = [](double x) {
383 constexpr double p2[5] = {0.1788541609, 0.1173957403, 0.01488850518, -0.001394989411, 0.0001283617211};
384 constexpr double q2[5] = {1.0, 0.7428795082, 0.3153932961, 0.06694219548, 0.008790609714};
385 return (p2[0] + (p2[1] + (p2[2] + (p2[3] + p2[4] * x) * x) * x) * x) /
386 (q2[0] + (q2[1] + (q2[2] + (q2[3] + q2[4] * x) * x) * x) * x);
387 };
388 auto case3 = [](double x) {
389 constexpr double p3[5] = {0.1788544503, 0.09359161662, 0.006325387654, 0.00006611667319, -0.000002031049101};
390 constexpr double q3[5] = {1.0, 0.6097809921, 0.2560616665, 0.04746722384, 0.006957301675};
391 return (p3[0] + (p3[1] + (p3[2] + (p3[3] + p3[4] * x) * x) * x) * x) /
392 (q3[0] + (q3[1] + (q3[2] + (q3[3] + q3[4] * x) * x) * x) * x);
393 };
394 auto case4 = [](double x) {
395 constexpr double p4[5] = {0.9874054407, 118.6723273, 849.2794360, -743.7792444, 427.0262186};
396 constexpr double q4[5] = {1.0, 106.8615961, 337.6496214, 2016.712389, 1597.063511};
397 const double u = 1 / x;
398 return u * u * (p4[0] + (p4[1] + (p4[2] + (p4[3] + p4[4] * u) * u) * u) * u) /
399 (q4[0] + (q4[1] + (q4[2] + (q4[3] + q4[4] * u) * u) * u) * u);
400 };
401 auto case5 = [](double x) {
402 constexpr double p5[5] = {1.003675074, 167.5702434, 4789.711289, 21217.86767, -22324.94910};
403 constexpr double q5[5] = {1.0, 156.9424537, 3745.310488, 9834.698876, 66924.28357};
404 const double u = 1 / x;
405 return u * u * (p5[0] + (p5[1] + (p5[2] + (p5[3] + p5[4] * u) * u) * u) * u) /
406 (q5[0] + (q5[1] + (q5[2] + (q5[3] + q5[4] * u) * u) * u) * u);
407 };
408 auto case6 = [](double x) {
409 constexpr double p6[5] = {1.000827619, 664.9143136, 62972.92665, 475554.6998, -5743609.109};
410 constexpr double q6[5] = {1.0, 651.4101098, 56974.73333, 165917.4725, -2815759.939};
411 const double u = 1 / x;
412 return u * u * (p6[0] + (p6[1] + (p6[2] + (p6[3] + p6[4] * u) * u) * u) * u) /
413 (q6[0] + (q6[1] + (q6[2] + (q6[3] + q6[4] * u) * u) * u) * u);
414 };
415 auto case7 = [](double x) {
416 const double a2[2] = {-1.845568670, -4.284640743};
417 const double u = 1 / (x - x * fast_log(x) / (x + 1));
418 return u * u * (1 + (a2[0] + a2[1] * u) * u);
419 };
420
421 Batch X = batches[0], M = batches[1], S = batches[2];
422
423 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
424 batches._output[i] = (X[i] - M[i]) / S[i];
425
426 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
427 if (S[i] <= 0.0)
428 batches._output[i] = 0;
429 else if (batches._output[i] < -5.5)
430 batches._output[i] = case0(batches._output[i]);
431 else if (batches._output[i] < -1.0)
432 batches._output[i] = case1(batches._output[i]);
433 else if (batches._output[i] < 1.0)
434 batches._output[i] = case2(batches._output[i]);
435 else if (batches._output[i] < 5.0)
436 batches._output[i] = case3(batches._output[i]);
437 else if (batches._output[i] < 12.0)
438 batches._output[i] = case4(batches._output[i]);
439 else if (batches._output[i] < 50.0)
440 batches._output[i] = case5(batches._output[i]);
441 else if (batches._output[i] < 300.)
442 batches._output[i] = case6(batches._output[i]);
443 else
444 batches._output[i] = case7(batches._output[i]);
445}
446
448{
449 Batch X = batches[0], M0 = batches[1], K = batches[2];
450 const double rootOf2pi = 2.506628274631000502415765284811;
451 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
452 double lnxOverM0 = fast_log(X[i] / M0[i]);
453 double lnk = fast_log(K[i]);
454 if (lnk < 0)
455 lnk = -lnk;
456 double arg = lnxOverM0 / lnk;
457 arg *= -0.5 * arg;
458 batches._output[i] = fast_exp(arg) / (X[i] * lnk * rootOf2pi);
459 }
460}
461
462/* TMath::ASinH(x) needs to be replaced with ln( x + sqrt(x^2+1))
463 * argasinh -> the argument of TMath::ASinH()
464 * argln -> the argument of the logarithm that replaces AsinH
465 * asinh -> the value that the function evaluates to
466 *
467 * ln is the logarithm that was solely present in the initial
468 * formula, that is before the asinh replacement
469 */
471{
472 Batch X = batches[0], P = batches[1], W = batches[2], T = batches[3];
473 constexpr double xi = 2.3548200450309494; // 2 Sqrt( Ln(4) )
474 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
475 double argasinh = 0.5 * xi * T[i];
476 double argln = argasinh + 1 / fast_isqrt(argasinh * argasinh + 1);
477 double asinh = fast_log(argln);
478
479 double argln2 = 1 - (X[i] - P[i]) * T[i] / W[i];
480 double ln = fast_log(argln2);
481 batches._output[i] = ln / asinh;
482 batches._output[i] *= -0.125 * xi * xi * batches._output[i];
483 batches._output[i] -= 2.0 / xi / xi * asinh * asinh;
484 }
485
486 // faster if you exponentiate in a seperate loop (dark magic!)
487 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
488 batches._output[i] = fast_exp(batches._output[i]);
489}
490
492{
493 Batch x = batches[0], mean = batches[1];
494 bool protectNegative = batches.extraArg(0);
495 bool noRounding = batches.extraArg(1);
496 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
497 const double x_i = noRounding ? x[i] : floor(x[i]);
498 batches._output[i] = std::lgamma(x_i + 1.);
499 }
500
501 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
502 const double x_i = noRounding ? x[i] : floor(x[i]);
503 const double logMean = fast_log(mean[i]);
504 const double logPoisson = x_i * logMean - mean[i] - batches._output[i];
505 batches._output[i] = fast_exp(logPoisson);
506
507 // Cosmetics
508 if (x_i < 0)
509 batches._output[i] = 0;
510 else if (x_i == 0)
511 batches._output[i] = 1 / fast_exp(mean[i]);
512
513 if (protectNegative && mean[i] < 0)
514 batches._output[i] = 1.E-3;
515 }
516}
517
519{
520 Batch X = batches[0];
521 const int nCoef = batches.getNExtraArgs() - 1;
522 const int lowestOrder = batches.extraArg(nCoef);
523 if (nCoef == 0) {
524 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
525 batches._output[i] = (lowestOrder > 0.0);
526 return;
527 } else
528 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
529 batches._output[i] = batches.extraArg(nCoef - 1);
530
531 /* Indexes are in range 0..nCoef-1 but coefList[nCoef-1]
532 * has already been processed. In order to traverse the list,
533 * with step of 2 we have to start at index nCoef-3 and use
534 * coefList[k+1] and coefList[k]
535 */
536 for (int k = nCoef - 3; k >= 0; k -= 2)
537 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
538 batches._output[i] = X[i] * (batches._output[i] * X[i] + batches.extraArg(k + 1)) + batches.extraArg(k);
539
540 // If nCoef is even, then the coefList[0] didn't get processed
541 if (nCoef % 2 == 0)
542 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
543 batches._output[i] = batches._output[i] * X[i] + batches.extraArg(0);
544
545 // Increase the order of the polynomial, first by myltiplying with X[i]^2
546 if (lowestOrder != 0) {
547 for (int k = 2; k <= lowestOrder; k += 2)
548 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
549 batches._output[i] *= X[i] * X[i];
550
551 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
552 if (lowestOrder % 2 == 1)
553 batches._output[i] *= X[i];
554 batches._output[i] += 1.0;
555 }
556 }
557}
558
560{
561 const int nPdfs = batches.extraArg(0);
562 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
563 batches._output[i] = 1.;
564 for (int pdf = 0; pdf < nPdfs; pdf++)
565 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
566 batches._output[i] *= batches[pdf][i];
567}
568
570{
571 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
572 batches._output[i] = batches[0][i] / batches[1][i];
573}
574
576{
577 Batch X = batches[0], M = batches[1], W = batches[2], S = batches[3];
578 const double invSqrt2 = 0.707106781186547524400844362105;
579 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP) {
580 const double arg = (X[i] - M[i]) * (X[i] - M[i]);
581 if (S[i] == 0.0 && W[i] == 0.0)
582 batches._output[i] = 1.0;
583 else if (S[i] == 0.0)
584 batches._output[i] = 1 / (arg + 0.25 * W[i] * W[i]);
585 else if (W[i] == 0.0)
586 batches._output[i] = fast_exp(-0.5 * arg / (S[i] * S[i]));
587 else
588 batches._output[i] = invSqrt2 / S[i];
589 }
590
591 for (size_t i = BEGIN; i < batches.getNEvents(); i += STEP)
592 if (S[i] != 0.0 && W[i] != 0.0) {
593 if (batches._output[i] < 0)
594 batches._output[i] = -batches._output[i];
595 const double factor = W[i] > 0.0 ? 0.5 : -0.5;
596 std::complex<double> z(batches._output[i] * (X[i] - M[i]), factor * batches._output[i] * W[i]);
597 batches._output[i] *= faddeeva_impl::faddeeva(z).real();
598 }
599}
600
601/// Returns a std::vector of pointers to the compute functions in this file.
602std::vector<void (*)(BatchesHandle)> getFunctions()
603{
604 return {computeAddPdf,
627}
628} // End namespace RF_ARCH
629} // End namespace RooBatchCompute
#define STEP
#define BEGIN
typedef void(GLAPIENTRYP _GLUfuncptr)(void)
#define c(i)
Definition RSha256.hxx:101
#define __rooglobal__
static const double x2[5]
static const double x1[5]
#define N
float xmin
float xmax
__roodevice__ void setExtraArg(uint8_t i, double val)
Definition Batches.h:102
__roodevice__ size_t getNEvents() const
Definition Batches.h:99
__roodevice__ uint8_t getNExtraArgs() const
Definition Batches.h:100
__roodevice__ double extraArg(uint8_t i) const
Definition Batches.h:101
const Double_t sigma
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
#define G(x, y, z)
__rooglobal__ void computeExponential(BatchesHandle batches)
__rooglobal__ void computeDstD0BG(BatchesHandle batches)
__rooglobal__ void computeLandau(BatchesHandle batches)
__rooglobal__ void computeArgusBG(BatchesHandle batches)
__rooglobal__ void computeBukin(BatchesHandle batches)
__rooglobal__ void computeNovosibirsk(BatchesHandle batches)
__rooglobal__ void computePolynomial(BatchesHandle batches)
__rooglobal__ void computePoisson(BatchesHandle batches)
__rooglobal__ void computeBifurGauss(BatchesHandle batches)
__rooglobal__ void computeRatio(BatchesHandle batches)
__rooglobal__ void computeAddPdf(BatchesHandle batches)
__rooglobal__ void computeChiSquare(BatchesHandle batches)
__rooglobal__ void computeChebychev(BatchesHandle batches)
__rooglobal__ void computeLognormal(BatchesHandle batches)
__rooglobal__ void computeVoigtian(BatchesHandle batches)
__rooglobal__ void computeGaussian(BatchesHandle batches)
__rooglobal__ void computeBernstein(BatchesHandle batches)
__rooglobal__ void computeNegativeLogarithms(BatchesHandle batches)
__rooglobal__ void computeGamma(BatchesHandle batches)
std::vector< void(*)(BatchesHandle)> getFunctions()
Returns a std::vector of pointers to the compute functions in this file.
__rooglobal__ void computeJohnson(BatchesHandle batches)
__rooglobal__ void computeBreitWigner(BatchesHandle batches)
__rooglobal__ void computeProdPdf(BatchesHandle batches)
__rooglobal__ void computeCBShape(BatchesHandle batches)
Namespace for dispatching RooFit computations to various backends.
__roodevice__ double fast_exp(double x)
__roodevice__ double fast_log(double x)
constexpr uint16_t bufferSize
Definition Batches.h:38
__roodevice__ double fast_isqrt(double x)
constexpr Double_t TwoPi()
Definition TMath.h:44
__roodevice__ __roohost__ std::complex< double > faddeeva(std::complex< double > z)
#define R1(v, w, x, y, z, i)
Definition sha1.inl:134
#define R2(v, w, x, y, z, i)
Definition sha1.inl:137
auto * m
Definition textangle.C:8