Geant4 Cross Reference |
1 // 1 // 2 // -*- C++ -*- 2 // -*- C++ -*- 3 // 3 // 4 // ------------------------------------------- 4 // ----------------------------------------------------------------------- 5 // HEP Random 5 // HEP Random 6 // --- RanluxppEngine -- 6 // --- RanluxppEngine --- 7 // helper implementation f 7 // helper implementation file 8 // ------------------------------------------- 8 // ----------------------------------------------------------------------- 9 9 10 #ifndef RANLUXPP_HELPERS_H 10 #ifndef RANLUXPP_HELPERS_H 11 #define RANLUXPP_HELPERS_H 11 #define RANLUXPP_HELPERS_H 12 12 13 #include <cstdint> 13 #include <cstdint> 14 14 15 /// Compute `a + b` and set `overflow` accordi 15 /// Compute `a + b` and set `overflow` accordingly. 16 static inline uint64_t add_overflow(uint64_t a 16 static inline uint64_t add_overflow(uint64_t a, uint64_t b, 17 unsigned & 17 unsigned &overflow) { 18 uint64_t add = a + b; 18 uint64_t add = a + b; 19 overflow = (add < a); 19 overflow = (add < a); 20 return add; 20 return add; 21 } 21 } 22 22 23 /// Compute `a + b` and increment `carry` if t 23 /// Compute `a + b` and increment `carry` if there was an overflow 24 static inline uint64_t add_carry(uint64_t a, u 24 static inline uint64_t add_carry(uint64_t a, uint64_t b, unsigned &carry) { 25 unsigned overflow; 25 unsigned overflow; 26 uint64_t add = add_overflow(a, b, overflow); 26 uint64_t add = add_overflow(a, b, overflow); 27 // Do NOT branch on overflow to avoid jumpin 27 // Do NOT branch on overflow to avoid jumping code, just add 0 if there was 28 // no overflow. 28 // no overflow. 29 carry += overflow; 29 carry += overflow; 30 return add; 30 return add; 31 } 31 } 32 32 33 /// Compute `a - b` and set `overflow` accordi 33 /// Compute `a - b` and set `overflow` accordingly 34 static inline uint64_t sub_overflow(uint64_t a 34 static inline uint64_t sub_overflow(uint64_t a, uint64_t b, 35 unsigned & 35 unsigned &overflow) { 36 uint64_t sub = a - b; 36 uint64_t sub = a - b; 37 overflow = (sub > a); 37 overflow = (sub > a); 38 return sub; 38 return sub; 39 } 39 } 40 40 41 /// Compute `a - b` and increment `carry` if t 41 /// Compute `a - b` and increment `carry` if there was an overflow 42 static inline uint64_t sub_carry(uint64_t a, u 42 static inline uint64_t sub_carry(uint64_t a, uint64_t b, unsigned &carry) { 43 unsigned overflow; 43 unsigned overflow; 44 uint64_t sub = sub_overflow(a, b, overflow); 44 uint64_t sub = sub_overflow(a, b, overflow); 45 // Do NOT branch on overflow to avoid jumpin 45 // Do NOT branch on overflow to avoid jumping code, just add 0 if there was 46 // no overflow. 46 // no overflow. 47 carry += overflow; 47 carry += overflow; 48 return sub; 48 return sub; 49 } 49 } 50 50 51 /// Update r = r - (t1 + t2) + (t3 + t2) * b * 51 /// Update r = r - (t1 + t2) + (t3 + t2) * b ** 10 52 /// 52 /// 53 /// This function also yields cbar = floor(r / 53 /// This function also yields cbar = floor(r / m) as its return value (int64_t 54 /// because the value can be -1). With an init 54 /// because the value can be -1). With an initial value of r = t0, this can 55 /// be used for computing the remainder after 55 /// be used for computing the remainder after division by m (see the function 56 /// mod_m in mulmod.h). The function to_ranlux 56 /// mod_m in mulmod.h). The function to_ranlux passes r = 0 and uses only the 57 /// return value to obtain the decimal expansi 57 /// return value to obtain the decimal expansion after divison by m. 58 static inline int64_t compute_r(const uint64_t 58 static inline int64_t compute_r(const uint64_t *upper, uint64_t *r) { 59 // Subtract t1 (24 * 24 = 576 bits) 59 // Subtract t1 (24 * 24 = 576 bits) 60 unsigned carry = 0; 60 unsigned carry = 0; 61 for (int i = 0; i < 9; i++) { 61 for (int i = 0; i < 9; i++) { 62 uint64_t r_i = r[i]; 62 uint64_t r_i = r[i]; 63 r_i = sub_overflow(r_i, carry, carry); 63 r_i = sub_overflow(r_i, carry, carry); 64 64 65 uint64_t t1_i = upper[i]; 65 uint64_t t1_i = upper[i]; 66 r_i = sub_carry(r_i, t1_i, carry); 66 r_i = sub_carry(r_i, t1_i, carry); 67 r[i] = r_i; 67 r[i] = r_i; 68 } 68 } 69 int64_t c = -((int64_t)carry); 69 int64_t c = -((int64_t)carry); 70 70 71 // Subtract t2 (only 240 bits, so need to ex 71 // Subtract t2 (only 240 bits, so need to extend) 72 carry = 0; 72 carry = 0; 73 for (int i = 0; i < 9; i++) { 73 for (int i = 0; i < 9; i++) { 74 uint64_t r_i = r[i]; 74 uint64_t r_i = r[i]; 75 r_i = sub_overflow(r_i, carry, carry); 75 r_i = sub_overflow(r_i, carry, carry); 76 76 77 uint64_t t2_bits = 0; 77 uint64_t t2_bits = 0; 78 if (i < 4) { 78 if (i < 4) { 79 t2_bits += upper[i + 5] >> 16; 79 t2_bits += upper[i + 5] >> 16; 80 if (i < 3) { 80 if (i < 3) { 81 t2_bits += upper[i + 6] << 48; 81 t2_bits += upper[i + 6] << 48; 82 } 82 } 83 } 83 } 84 r_i = sub_carry(r_i, t2_bits, carry); 84 r_i = sub_carry(r_i, t2_bits, carry); 85 r[i] = r_i; 85 r[i] = r_i; 86 } 86 } 87 c -= carry; 87 c -= carry; 88 88 89 // r += (t3 + t2) * 2 ** 240 89 // r += (t3 + t2) * 2 ** 240 90 carry = 0; 90 carry = 0; 91 { 91 { 92 uint64_t r_3 = r[3]; 92 uint64_t r_3 = r[3]; 93 // 16 upper bits 93 // 16 upper bits 94 uint64_t t2_bits = (upper[5] >> 16) << 48; 94 uint64_t t2_bits = (upper[5] >> 16) << 48; 95 uint64_t t3_bits = (upper[0] << 48); 95 uint64_t t3_bits = (upper[0] << 48); 96 96 97 r_3 = add_carry(r_3, t2_bits, carry); 97 r_3 = add_carry(r_3, t2_bits, carry); 98 r_3 = add_carry(r_3, t3_bits, carry); 98 r_3 = add_carry(r_3, t3_bits, carry); 99 99 100 r[3] = r_3; 100 r[3] = r_3; 101 } 101 } 102 for (int i = 0; i < 3; i++) { 102 for (int i = 0; i < 3; i++) { 103 uint64_t r_i = r[i + 4]; 103 uint64_t r_i = r[i + 4]; 104 r_i = add_overflow(r_i, carry, carry); 104 r_i = add_overflow(r_i, carry, carry); 105 105 106 uint64_t t2_bits = (upper[5 + i] >> 32) + 106 uint64_t t2_bits = (upper[5 + i] >> 32) + (upper[6 + i] << 32); 107 uint64_t t3_bits = (upper[i] >> 16) + (upp 107 uint64_t t3_bits = (upper[i] >> 16) + (upper[1 + i] << 48); 108 108 109 r_i = add_carry(r_i, t2_bits, carry); 109 r_i = add_carry(r_i, t2_bits, carry); 110 r_i = add_carry(r_i, t3_bits, carry); 110 r_i = add_carry(r_i, t3_bits, carry); 111 111 112 r[i + 4] = r_i; 112 r[i + 4] = r_i; 113 } 113 } 114 { 114 { 115 uint64_t r_7 = r[7]; 115 uint64_t r_7 = r[7]; 116 r_7 = add_overflow(r_7, carry, carry); 116 r_7 = add_overflow(r_7, carry, carry); 117 117 118 uint64_t t2_bits = (upper[8] >> 32); 118 uint64_t t2_bits = (upper[8] >> 32); 119 uint64_t t3_bits = (upper[3] >> 16) + (upp 119 uint64_t t3_bits = (upper[3] >> 16) + (upper[4] << 48); 120 120 121 r_7 = add_carry(r_7, t2_bits, carry); 121 r_7 = add_carry(r_7, t2_bits, carry); 122 r_7 = add_carry(r_7, t3_bits, carry); 122 r_7 = add_carry(r_7, t3_bits, carry); 123 123 124 r[7] = r_7; 124 r[7] = r_7; 125 } 125 } 126 { 126 { 127 uint64_t r_8 = r[8]; 127 uint64_t r_8 = r[8]; 128 r_8 = add_overflow(r_8, carry, carry); 128 r_8 = add_overflow(r_8, carry, carry); 129 129 130 uint64_t t3_bits = (upper[4] >> 16) + (upp 130 uint64_t t3_bits = (upper[4] >> 16) + (upper[5] << 48); 131 131 132 r_8 = add_carry(r_8, t3_bits, carry); 132 r_8 = add_carry(r_8, t3_bits, carry); 133 133 134 r[8] = r_8; 134 r[8] = r_8; 135 } 135 } 136 c += carry; 136 c += carry; 137 137 138 // c = floor(r / 2 ** 576) has been computed 138 // c = floor(r / 2 ** 576) has been computed along the way via the carry 139 // flags. Now if c = 0 and the value current 139 // flags. Now if c = 0 and the value currently stored in r is greater or 140 // equal to m, we need cbar = 1 and subtract 140 // equal to m, we need cbar = 1 and subtract m, otherwise cbar = c. The 141 // value currently in r is greater or equal 141 // value currently in r is greater or equal to m, if and only if one of 142 // the last 240 bits is set and the upper bi 142 // the last 240 bits is set and the upper bits are all set. 143 bool greater_m = r[0] | r[1] | r[2] | (r[3] 143 bool greater_m = r[0] | r[1] | r[2] | (r[3] & 0x0000ffffffffffff); 144 greater_m &= (r[3] >> 48) == 0xffff; 144 greater_m &= (r[3] >> 48) == 0xffff; 145 for (int i = 4; i < 9; i++) { 145 for (int i = 4; i < 9; i++) { 146 greater_m &= (r[i] == UINT64_MAX); 146 greater_m &= (r[i] == UINT64_MAX); 147 } 147 } 148 return c + (c == 0 && greater_m); 148 return c + (c == 0 && greater_m); 149 } 149 } 150 150 151 #endif 151 #endif 152 152