37#define BEGIN blockDim.x *blockIdx.x + threadIdx.x
38#define STEP blockDim.x *gridDim.x
52 for (
int pdf = 1; pdf < nPdfs; pdf++)
59 Batch m = batches[0], m0 = batches[1],
c = batches[2], p = batches[3];
61 const double t =
m[i] / m0[i];
62 const double u = 1 - t * t;
76 const int degree = nCoef - 1;
79 Batch xData = batches[0];
82 double binomial = 1.0;
83 for (
int k = 0; k < nCoef; k++) {
85 binomial = (binomial * (degree - k)) / (k + 1);
91 powX[i] = pow_1_X[i] = 1.0;
98 for (
int k = 2; k <= degree; k += 2)
100 pow_1_X[i] *= _1_X[i] * _1_X[i];
104 pow_1_X[i] *= _1_X[i];
108 _1_X[i] = 1 / _1_X[i];
110 for (
int k = 0; k < nCoef; k++)
116 pow_1_X[i] *= _1_X[i];
122 double powX = 1.0, pow_1_X = 1.0;
123 for (
int k = 1; k <= degree; k++)
125 const double _1_X = 1 / (1 - X);
126 for (
int k = 0; k < nCoef; k++) {
135 for (
int k = 0; k < nCoef; k++) {
137 binomial = (binomial * (degree - k)) / (k + 1);
143 Batch X = batches[0], M = batches[1], SL = batches[2], SR = batches[3];
145 double arg = X[i] - M[i];
156 Batch X = batches[0], M = batches[1], W = batches[2];
158 const double arg = X[i] - M[i];
159 batches.
_output[i] = 1 / (arg * arg + 0.25 * W[i] * W[i]);
165 Batch X = batches[0], XP = batches[1], SP = batches[2], XI = batches[3],
R1 = batches[4],
R2 = batches[5];
166 const double r3 = log(2.0);
167 const double r6 = exp(-6.0);
168 const double r7 = 2 * sqrt(2 * log(2.0));
171 const double r1 = XI[i] *
fast_isqrt(XI[i] * XI[i] + 1);
172 const double r4 = 1 /
fast_isqrt(XI[i] * XI[i] + 1);
173 const double hp = 1 / (SP[i] * r7);
174 const double x1 = XP[i] + 0.5 * SP[i] * r7 * (r1 - 1);
175 const double x2 = XP[i] + 0.5 * SP[i] * r7 * (r1 + 1);
178 if (XI[i] > r6 || XI[i] < -r6)
181 double factor = 1,
y = X[i] -
x1, Yp = XP[i] -
x1, yi = r4 - XI[i], rho =
R1[i];
190 batches.
_output[i] = rho *
y *
y / Yp / Yp - r3 + factor * 4 * r3 *
y * hp * r5 * r4 / yi / yi;
191 if (X[i] >=
x1 && X[i] <
x2) {
193 fast_log(1 + 4 * XI[i] * r4 * (X[i] - XP[i]) * hp) /
fast_log(1 + 2 * XI[i] * (XI[i] - r4));
196 if (X[i] >=
x1 && X[i] <
x2 && XI[i] < r6 && XI[i] > -r6)
197 batches.
_output[i] = -4 * r3 * (X[i] - XP[i]) * (X[i] - XP[i]) * hp * hp;
205 Batch M = batches[0], M0 = batches[1], S = batches[2], A = batches[3],
N = batches[4];
207 const double t = (M[i] - M0[i]) / S[i];
208 if ((A[i] > 0 && t >= -A[i]) || (A[i] < 0 && -t >= A[i]))
209 batches.
_output[i] = -0.5 * t * t;
211 batches.
_output[i] =
N[i] / (
N[i] - A[i] * A[i] - A[i] * t);
214 batches.
_output[i] -= 0.5 * A[i] * A[i];
223 Batch xData = batches[0];
234 prev[i][0] = batches.
_output[i] = 1.0;
237 for (
int k = 0; k < nCoef; k++)
242 const double next = 2 * X[i] * prev[i][1] - prev[i][0];
243 prev[i][0] = prev[i][1];
248 double prev0 = 1.0, prev1 = 2 * (xData[i] - 0.5 * (
xmax +
xmin)) / (
xmax -
xmin), X = prev1;
250 for (
int k = 0; k < nCoef; k++) {
254 const double next = 2 * X * prev1 - prev0;
263 Batch X = batches[0];
264 const double ndof = batches.
extraArg(0);
265 const double gamma = 1 / std::tgamma(ndof / 2.0);
269 constexpr double ln2 = 0.693147180559945309417232121458;
271 double arg = (ndof - 2) *
fast_log(X[i]) - X[i] - ndof * ln2;
278 Batch DM = batches[0], DM0 = batches[1], C = batches[2], A = batches[3], B = batches[4];
280 const double ratio = DM[i] / DM0[i];
281 const double arg1 = (DM0[i] - DM[i]) / C[i];
282 const double arg2 = A[i] *
fast_log(ratio);
293 Batch x = batches[0],
c = batches[1];
300 Batch X = batches[0],
G = batches[1], B = batches[2], M = batches[3];
301 double gamma = -std::lgamma(
G[0]);
304 batches.
_output[i] = (
G[i] == 1.0) / B[i];
305 else if (
G.isItVector())
306 batches.
_output[i] = -std::lgamma(
G[i]);
312 const double invBeta = 1 / B[i];
313 double arg = (X[i] - M[i]) * invBeta;
316 batches.
_output[i] += arg * (
G[i] - 1);
324 auto x = batches[0], mean = batches[1],
sigma = batches[2];
326 const double arg =
x[i] - mean[i];
327 const double halfBySigmaSq = -0.5 / (
sigma[i] *
sigma[i]);
339 batches.
_output[i] *= batches[1][i];
344 Batch mass = batches[0], mu = batches[1], lambda = batches[2], gamma = batches[3], delta = batches[4];
346 const double massThreshold = batches.
extraArg(0);
349 const double arg = (mass[i] - mu[i]) / lambda[i];
353 const double asinh_arg = asinh(arg);
355 const double expo = gamma[i] + delta[i] * asinh_arg;
356 const double result =
357 delta[i] *
fast_exp(-0.5 * expo * expo) *
fast_isqrt(1. + arg * arg) / (sqrtTwoPi * lambda[i]);
359 const double passThrough = mass[i] >= massThreshold;
360 batches.
_output[i] = result * passThrough;
370 auto case0 = [](
double x) {
371 const double a1[3] = {0.04166666667, -0.01996527778, 0.02709538966};
373 return 0.3989422803 *
fast_exp(-1 / u - 0.5 * (
x + 1)) * (1 + (a1[0] + (a1[1] + a1[2] * u) * u) * u);
375 auto case1 = [](
double x) {
376 constexpr double p1[5] = {0.4259894875, -0.1249762550, 0.03984243700, -0.006298287635, 0.001511162253};
377 constexpr double q1[5] = {1.0, -0.3388260629, 0.09594393323, -0.01608042283, 0.003778942063};
379 return fast_exp(-u - 0.5 * (
x + 1)) * (p1[0] + (p1[1] + (p1[2] + (p1[3] + p1[4] *
x) *
x) *
x) *
x) /
380 (q1[0] + (q1[1] + (q1[2] + (q1[3] + q1[4] *
x) *
x) *
x) *
x);
382 auto case2 = [](
double x) {
383 constexpr double p2[5] = {0.1788541609, 0.1173957403, 0.01488850518, -0.001394989411, 0.0001283617211};
384 constexpr double q2[5] = {1.0, 0.7428795082, 0.3153932961, 0.06694219548, 0.008790609714};
385 return (p2[0] + (p2[1] + (p2[2] + (p2[3] + p2[4] *
x) *
x) *
x) *
x) /
386 (q2[0] + (q2[1] + (q2[2] + (q2[3] + q2[4] *
x) *
x) *
x) *
x);
388 auto case3 = [](
double x) {
389 constexpr double p3[5] = {0.1788544503, 0.09359161662, 0.006325387654, 0.00006611667319, -0.000002031049101};
390 constexpr double q3[5] = {1.0, 0.6097809921, 0.2560616665, 0.04746722384, 0.006957301675};
391 return (p3[0] + (p3[1] + (p3[2] + (p3[3] + p3[4] *
x) *
x) *
x) *
x) /
392 (q3[0] + (q3[1] + (q3[2] + (q3[3] + q3[4] *
x) *
x) *
x) *
x);
394 auto case4 = [](
double x) {
395 constexpr double p4[5] = {0.9874054407, 118.6723273, 849.2794360, -743.7792444, 427.0262186};
396 constexpr double q4[5] = {1.0, 106.8615961, 337.6496214, 2016.712389, 1597.063511};
397 const double u = 1 /
x;
398 return u * u * (p4[0] + (p4[1] + (p4[2] + (p4[3] + p4[4] * u) * u) * u) * u) /
399 (q4[0] + (q4[1] + (q4[2] + (q4[3] + q4[4] * u) * u) * u) * u);
401 auto case5 = [](
double x) {
402 constexpr double p5[5] = {1.003675074, 167.5702434, 4789.711289, 21217.86767, -22324.94910};
403 constexpr double q5[5] = {1.0, 156.9424537, 3745.310488, 9834.698876, 66924.28357};
404 const double u = 1 /
x;
405 return u * u * (p5[0] + (p5[1] + (p5[2] + (p5[3] + p5[4] * u) * u) * u) * u) /
406 (q5[0] + (q5[1] + (q5[2] + (q5[3] + q5[4] * u) * u) * u) * u);
408 auto case6 = [](
double x) {
409 constexpr double p6[5] = {1.000827619, 664.9143136, 62972.92665, 475554.6998, -5743609.109};
410 constexpr double q6[5] = {1.0, 651.4101098, 56974.73333, 165917.4725, -2815759.939};
411 const double u = 1 /
x;
412 return u * u * (p6[0] + (p6[1] + (p6[2] + (p6[3] + p6[4] * u) * u) * u) * u) /
413 (q6[0] + (q6[1] + (q6[2] + (q6[3] + q6[4] * u) * u) * u) * u);
415 auto case7 = [](
double x) {
416 const double a2[2] = {-1.845568670, -4.284640743};
418 return u * u * (1 + (a2[0] + a2[1] * u) * u);
421 Batch X = batches[0], M = batches[1], S = batches[2];
424 batches.
_output[i] = (X[i] - M[i]) / S[i];
429 else if (batches.
_output[i] < -5.5)
431 else if (batches.
_output[i] < -1.0)
433 else if (batches.
_output[i] < 1.0)
435 else if (batches.
_output[i] < 5.0)
437 else if (batches.
_output[i] < 12.0)
439 else if (batches.
_output[i] < 50.0)
441 else if (batches.
_output[i] < 300.)
449 Batch X = batches[0], M0 = batches[1], K = batches[2];
450 const double rootOf2pi = 2.506628274631000502415765284811;
452 double lnxOverM0 =
fast_log(X[i] / M0[i]);
456 double arg = lnxOverM0 / lnk;
472 Batch X = batches[0], P = batches[1], W = batches[2], T = batches[3];
473 constexpr double xi = 2.3548200450309494;
475 double argasinh = 0.5 * xi * T[i];
476 double argln = argasinh + 1 /
fast_isqrt(argasinh * argasinh + 1);
479 double argln2 = 1 - (X[i] - P[i]) * T[i] / W[i];
481 batches.
_output[i] = ln / asinh;
483 batches.
_output[i] -= 2.0 / xi / xi * asinh * asinh;
493 Batch x = batches[0], mean = batches[1];
494 bool protectNegative = batches.
extraArg(0);
495 bool noRounding = batches.
extraArg(1);
497 const double x_i = noRounding ?
x[i] : floor(
x[i]);
498 batches.
_output[i] = std::lgamma(x_i + 1.);
502 const double x_i = noRounding ?
x[i] : floor(
x[i]);
503 const double logMean =
fast_log(mean[i]);
504 const double logPoisson = x_i * logMean - mean[i] - batches.
_output[i];
513 if (protectNegative && mean[i] < 0)
520 Batch X = batches[0];
522 const int lowestOrder = batches.
extraArg(nCoef);
525 batches.
_output[i] = (lowestOrder > 0.0);
536 for (
int k = nCoef - 3; k >= 0; k -= 2)
546 if (lowestOrder != 0) {
547 for (
int k = 2; k <= lowestOrder; k += 2)
549 batches.
_output[i] *= X[i] * X[i];
552 if (lowestOrder % 2 == 1)
561 const int nPdfs = batches.
extraArg(0);
564 for (
int pdf = 0; pdf < nPdfs; pdf++)
566 batches.
_output[i] *= batches[pdf][i];
572 batches.
_output[i] = batches[0][i] / batches[1][i];
577 Batch X = batches[0], M = batches[1], W = batches[2], S = batches[3];
578 const double invSqrt2 = 0.707106781186547524400844362105;
580 const double arg = (X[i] - M[i]) * (X[i] - M[i]);
581 if (S[i] == 0.0 && W[i] == 0.0)
583 else if (S[i] == 0.0)
584 batches.
_output[i] = 1 / (arg + 0.25 * W[i] * W[i]);
585 else if (W[i] == 0.0)
588 batches.
_output[i] = invSqrt2 / S[i];
592 if (S[i] != 0.0 && W[i] != 0.0) {
595 const double factor = W[i] > 0.0 ? 0.5 : -0.5;
596 std::complex<double> z(batches.
_output[i] * (X[i] - M[i]), factor * batches.
_output[i] * W[i]);
typedef void(GLAPIENTRYP _GLUfuncptr)(void)
static const double x2[5]
static const double x1[5]
__roodevice__ void setExtraArg(uint8_t i, double val)
__roodevice__ size_t getNEvents() const
__roodevice__ uint8_t getNExtraArgs() const
__roodevice__ double extraArg(uint8_t i) const
__rooglobal__ void computeExponential(BatchesHandle batches)
__rooglobal__ void computeDstD0BG(BatchesHandle batches)
__rooglobal__ void computeLandau(BatchesHandle batches)
__rooglobal__ void computeArgusBG(BatchesHandle batches)
__rooglobal__ void computeBukin(BatchesHandle batches)
__rooglobal__ void computeNovosibirsk(BatchesHandle batches)
__rooglobal__ void computePolynomial(BatchesHandle batches)
__rooglobal__ void computePoisson(BatchesHandle batches)
__rooglobal__ void computeBifurGauss(BatchesHandle batches)
__rooglobal__ void computeRatio(BatchesHandle batches)
__rooglobal__ void computeAddPdf(BatchesHandle batches)
__rooglobal__ void computeChiSquare(BatchesHandle batches)
__rooglobal__ void computeChebychev(BatchesHandle batches)
__rooglobal__ void computeLognormal(BatchesHandle batches)
__rooglobal__ void computeVoigtian(BatchesHandle batches)
__rooglobal__ void computeGaussian(BatchesHandle batches)
__rooglobal__ void computeBernstein(BatchesHandle batches)
__rooglobal__ void computeNegativeLogarithms(BatchesHandle batches)
__rooglobal__ void computeGamma(BatchesHandle batches)
std::vector< void(*)(BatchesHandle)> getFunctions()
Returns a std::vector of pointers to the compute functions in this file.
__rooglobal__ void computeJohnson(BatchesHandle batches)
__rooglobal__ void computeBreitWigner(BatchesHandle batches)
__rooglobal__ void computeProdPdf(BatchesHandle batches)
__rooglobal__ void computeCBShape(BatchesHandle batches)
Namespace for dispatching RooFit computations to various backends.
__roodevice__ double fast_exp(double x)
__roodevice__ double fast_log(double x)
constexpr uint16_t bufferSize
__roodevice__ double fast_isqrt(double x)
constexpr Double_t TwoPi()
__roodevice__ __roohost__ std::complex< double > faddeeva(std::complex< double > z)
#define R1(v, w, x, y, z, i)
#define R2(v, w, x, y, z, i)