12#ifndef RANLUXPP_MULMOD_H 
   13#define RANLUXPP_MULMOD_H 
   24static void multiply9x9(
const uint64_t *in1, 
const uint64_t *in2, uint64_t *out)
 
   27   unsigned nextCarry = 0;
 
   29#if defined(__clang__) || defined(__INTEL_COMPILER) || defined(__CUDACC__) 
   31#elif defined(__GNUC__) && __GNUC__ >= 8 
   35   for (
int i = 0; i < 18; i++) {
 
   36      uint64_t current = next;
 
   37      unsigned carry = nextCarry;
 
   42#if defined(__clang__) || defined(__INTEL_COMPILER) || defined(__CUDACC__) 
   44#elif defined(__GNUC__) && __GNUC__ >= 8 
   48      for (
int j = 0; j < 9; j++) {
 
   53         uint64_t fac1 = in1[j];
 
   54         uint64_t fac2 = in2[k];
 
   55#if defined(__SIZEOF_INT128__) && !defined(ROOT_NO_INT128) 
   56         unsigned __int128 prod = fac1;
 
   59         uint64_t upper = prod >> 64;
 
   60         uint64_t lower = 
static_cast<uint64_t
>(prod);
 
   62         uint64_t upper1 = fac1 >> 32;
 
   63         uint64_t lower1 = 
static_cast<uint32_t
>(fac1);
 
   65         uint64_t upper2 = fac2 >> 32;
 
   66         uint64_t lower2 = 
static_cast<uint32_t
>(fac2);
 
   70         uint64_t upper = upper1 * upper2;
 
   71         uint64_t middle1 = upper1 * lower2;
 
   72         uint64_t middle2 = lower1 * upper2;
 
   73         uint64_t lower = lower1 * lower2;
 
   78         uint64_t middle = 
add_overflow(middle1, middle2, overflow);
 
   89         uint64_t overflow_add = overflow * (uint64_t(1) << 32);
 
   94         upper += overflow_add;
 
   96         uint64_t middle_upper = middle >> 32;
 
   97         uint64_t middle_lower = middle << 32;
 
  114         upper += middle_upper;
 
  118         current = 
add_carry(current, lower, carry);
 
  121         next = 
add_carry(next, upper, nextCarry);
 
  124      next = 
add_carry(next, carry, nextCarry);
 
  138static void mod_m(
const uint64_t *mul, uint64_t *out)
 
  142   for (
int i = 0; i < 9; i++) {
 
  170   uint64_t c_unsigned = 
static_cast<uint64_t
>(
c);
 
  173   int64_t t2 = t0 - (c_unsigned << 48);
 
  177   int64_t 
t1 = t2 >> 48;
 
  186   for (
int i = 1; i < 3; i++) {
 
  190      uint64_t out_i = 
sub_carry(r_i, t0, carry);
 
  197      uint64_t out_3 = 
sub_carry(r_3, t2, carry);
 
  200   for (
int i = 4; i < 9; i++) {
 
  215static void mulmod(
const uint64_t *in1, uint64_t *inout)
 
  217   uint64_t mul[2 * 9] = {0};
 
  229static void powermod(
const uint64_t *base, uint64_t *res, uint64_t 
n)
 
  231   uint64_t fac[9] = {0};
 
  234   for (
int i = 1; i < 9; i++) {
 
  239   uint64_t mul[18] = {0};
 
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
 
static uint64_t sub_overflow(uint64_t a, uint64_t b, unsigned &overflow)
Compute a - b and set overflow accordingly.
 
static uint64_t sub_carry(uint64_t a, uint64_t b, unsigned &carry)
Compute a - b and increment carry if there was an overflow.
 
static uint64_t add_carry(uint64_t a, uint64_t b, unsigned &carry)
Compute a + b and increment carry if there was an overflow.
 
static int64_t compute_r(const uint64_t *upper, uint64_t *r)
Update r = r - (t1 + t2) + (t3 + t2) * b ** 10.
 
static uint64_t add_overflow(uint64_t a, uint64_t b, unsigned &overflow)
Compute a + b and set overflow accordingly.
 
static void mulmod(const uint64_t *in1, uint64_t *inout)
Combine multiply9x9 and mod_m with internal temporary storage.
 
static void powermod(const uint64_t *base, uint64_t *res, uint64_t n)
Compute base to the n modulo m.
 
static void mod_m(const uint64_t *mul, uint64_t *out)
Compute a value congruent to mul modulo m less than 2 ** 576.
 
static void multiply9x9(const uint64_t *in1, const uint64_t *in2, uint64_t *out)
Multiply two 576 bit numbers, stored as 9 numbers of 64 bits each.