33 #define __MIXMAX_C // do NOT define it in your own program, just include mixmax.h
59 for (
int i = 0; i < niter; ++i)
67 inline uint64_t
MULWU (uint64_t k){ (
void)k;
return 0;}
69 #error SPECIALMUL not undefined
79 Y[0] = ( tempV = sumtotOld);
80 myuint sumtot = Y[0], ovflow = 0;
85 tempP =
modadd(tempP,Y[i]);
88 tempP =
modadd(tempP , Y[i]);
89 tempV =
modadd(tempV , tempP);
92 sumtot += tempV;
if (sumtot < tempV) {ovflow++;}
96 Y[2] =
modadd( Y[2] , temp2 );
97 sumtot += temp2;
if (sumtot < temp2) {ovflow++;}
110 int niter = nskip + 1;
126 #if defined(__SSE__) && defined(USE_INLINE_ASM)
128 __asm__ (
"pxor %0, %0; "
146 for (i=0; i<(n/M); i++){
151 unsigned int rem=(n % M);
154 for (j=0; j< (rem); j++){
172 myuint sumtot = 0, ovflow = 0;
175 tempP =
modadd(tempP,Y[i]);
176 Y[i] = ( tempV =
modadd(tempV,tempP) );
177 sumtot += tempV;
if (sumtot < tempV) {ovflow++;}
182 Y[2] =
modadd( Y[2] , temp2 );
183 sumtot += temp2;
if (sumtot < temp2) {ovflow++;}
189 #if defined(__x86_64__) && defined(USE_INLINE_ASM)
192 __asm__ (
"addq %2, %0; "
229 for ( i=0; i <
N; i++){
231 sumtot += X->
V[(i)];
if (sumtot < X->V[(i)]) {ovflow++;}
242 for (i=0; i <
N; i++){
257 const myuint MULT64=6364136223846793005ULL;
261 fprintf(stderr,
" try seeding with nonzero seed next time!\n");
269 for (i=0; i <
N; i++){
270 l*=MULT64; l = (l << 32) ^ (l>>32);
272 sumtot += X->
V[(i)];
if (sumtot < X->V[(i)]) {ovflow++;}
282 for (i=0; i <
N; i++){
302 #if defined(__x86_64__)
303 inline myuint mod128(__uint128_t s){
311 temp = (__uint128_t)a*(__uint128_t)b + cum;
315 #else // on all other platforms, including 32-bit linux, PPC and PPC64 and all Windows
316 #define MASK32 0xFFFFFFFFULL
320 register myuint o,ph,pl,ah,al;
326 o = (o &
M61) + ((ph*ah)<<3) + ((ah*pl+al*ph + ((al*pl)>>32))>>29) ;
328 o = (o &
M61) + ((o>>61));
335 fprintf(X->
fh,
"mixmax state, file version 1.0\n" );
338 fprintf(X->
fh,
"%llu, ", X->
V[j] );
341 fprintf(X->
fh,
"}; " );
342 fprintf(X->
fh,
"counter=%u; ", X->
counter );
343 fprintf(X->
fh,
"sumtot=%llu;\n", X->
sumtot );
349 if( ( fin = fopen(filename,
"r") ) ){
356 fprintf(stderr,
"mixmax -> read_state: error reading file %s\n", filename);
362 if (!fscanf(fin,
"%llu", &X->
V[0]) ) {fprintf(stderr,
"mixmax -> read_state: error reading file %s\n", filename); exit(
ERROR_READING_STATE_FILE);}
366 if (!fscanf(fin,
", %llu", &vecVal) ) {fprintf(stderr,
"mixmax -> read_state: error reading vector component i=%d from file %s\n", i, filename); exit(
ERROR_READING_STATE_FILE);}
371 fprintf(stderr,
"mixmax -> read_state: Invalid state vector value= %llu"
372 " ( must be less than %llu ) "
373 " obtained from reading file %s\n"
379 unsigned int counter;
380 if (!fscanf( fin,
"}; counter=%u; ", &counter)){fprintf(stderr,
"mixmax -> read_state: error reading counter from file %s\n", filename); exit(
ERROR_READING_STATE_FILE);}
384 fprintf(stderr,
"mixmax -> read_state: Invalid counter = %d"
385 " Must be 0 <= counter < %u\n" , counter,
N);
391 if (!fscanf( fin,
"sumtot=%llu\n", &sumtot)){fprintf(stderr,
"mixmax -> read_state: error reading checksum from file %s\n", filename); exit(
ERROR_READING_STATE_FILE);}
393 if (X->
sumtot != sumtot) {
394 fprintf(stderr,
"mixmax -> checksum error while reading state from file %s - corrupted?\n", filename);
402 #define FUSEDMODMULVEC \
403 { for (i =0; i<N; i++){ \
404 cum[i] = fmodmulM61( cum[i], coeff , Y[i] ) ; \
410 #if ( ( (N==88)||(N==256) ||(N==1000) || (N==3150) ) && BITS==61 && SKIPISON!=0)
446 #include "mixmax_skip_N88.icc"
450 #include "mixmax_skip_N1000.icc"
452 #include "mixmax_skip_N3150.icc"
456 myID_t IDvec[4] = {streamID, runID, machineID, clusterID};
465 for (i=0; i<
N; i++) { Y[i] = Vin[i]; sumtot =
modadd( sumtot, Vin[i]); } ;
466 for (IDindex=0; IDindex<4; IDindex++) {
472 rowPtr = (
myuint*)skipMat[r + IDindex*8*
sizeof(
myID_t)];
474 for (i=0; i<
N; i++){ cum[i] = 0; }
482 for (i=0; i<
N; i++){ Y[i] = cum[i]; sumtot =
modadd( sumtot, cum[i]); } ;
488 for (i=0; i<
N; i++){ Vout[i] = Y[i]; sumtot =
modadd( sumtot, Y[i]); } ;
492 #warning For this N, we dont have the skipping coefficients yet, using alternative method to seed
499 Xin->
V[4] = (
myuint)clusterID << 5;
500 Xin->
V[5] = (
myuint)machineID << 7;
501 Xin->
V[6] = (
myuint)runID << 11;
502 Xin->
V[7] = (
myuint)streamID << 13;
rng_state_t * rng_copy(myuint *Y)
#define ERROR_READING_STATE_FILE
void set_skip_number(int n)
void branch_inplace(rng_state_t *Xin, myID_t *IDvec)
static const char * filename()
int rng_free(rng_state_t *X)
rng_state_t * rng_alloc()
double get_next_float(rng_state_t *X)
Vc_ALWAYS_INLINE void free(T *p)
Frees memory that was allocated with Vc::malloc.
#define ARRAY_INDEX_OUT_OF_BOUNDS
std::map< std::string, std::string >::const_iterator iter
void set_first_return_element(int n)
myuint get_next(rng_state_t *X)
void seed_uniquestream(rng_state_t *Xin, myID_t clusterID, myID_t machineID, myID_t runID, myID_t streamID)
void seed_vielbein(rng_state_t *X, unsigned int index)
myuint fmodmulM61(myuint cum, myuint s, myuint a)
void read_state(rng_state_t *X, const char filename[])
#define ERROR_READING_STATE_CHECKSUM
void seed_spbox(rng_state_t *X, myuint seed)
void print_state(rng_state_t *X)
myuint apply_bigskip(myuint *Vout, myuint *Vin, myID_t clusterID, myID_t machineID, myID_t runID, myID_t streamID)
uint64_t MULWU(uint64_t k)
void iterate_and_fill_array(rng_state_t *X, double *array)
#define INV_MERSBASE
#define INV_MERSBASE (0x1p-61)
myuint precalc(rng_state_t *X)
int get_first_return_element()
myuint iterate_raw_vec(myuint *Y, myuint sumtotOld)
typedef void((*Func_t)())
int iterate(rng_state_t *X)
std::complex< float_v > Z
#define ERROR_READING_STATE_COUNTER
Vc_ALWAYS_INLINE_L T *Vc_ALWAYS_INLINE_R malloc(size_t n)
Allocates memory on the Heap with alignment and padding suitable for vectorized access.
void fill_array(rng_state_t *X, unsigned int n, double *array)
myuint modadd(myuint foo, myuint bar)