summaryrefslogtreecommitdiff
path: root/libgfortran
diff options
context:
space:
mode:
Diffstat (limited to 'libgfortran')
-rw-r--r--libgfortran/ChangeLog28
-rw-r--r--libgfortran/intrinsics/random.c213
2 files changed, 127 insertions, 114 deletions
diff --git a/libgfortran/ChangeLog b/libgfortran/ChangeLog
index 447ed5a42c7..7a11ca29fd3 100644
--- a/libgfortran/ChangeLog
+++ b/libgfortran/ChangeLog
@@ -1,3 +1,31 @@
+2019-08-13 Janne Blomqvist <jb@gcc.gnu.org>
+
+ PR fortran/91414
+ * intrinsics/random.c (prng_state): Update state struct.
+ (master_state): Update to match new size.
+ (get_rand_state): Update to match new PRNG.
+ (rotl): New function.
+ (xorshift1024star): Replace with prng_next.
+ (prng_next): New function.
+ (jump): Update for new PRNG.
+ (lcg_parkmiller): Replace with splitmix64.
+ (splitmix64): New function.
+ (getosrandom): Fix return value, simplify.
+ (init_rand_state): Use getosrandom only to get 8 bytes, splitmix64
+ to fill rest of state.
+ (random_r4): Update to new function and struct names.
+ (random_r8): Likewise.
+ (random_r10): Likewise.
+ (random_r16): Likewise.
+ (arandom_r4): Liekwise.
+ (arandom_r8): Likewise.
+ (arandom_r10): Likwewise.
+ (arandom_r16): Likewise.
+ (xor_keys): Reduce size to match new PRNG.
+ (random_seed_i4): Update to new function and struct names, remove
+ special handling of variable p used in previous PRNG.
+ (random_seed_i8): Likewise.
+
2019-08-07 Janne Blomqvist <jb@gcc.gnu.org>
PR fortran/53796
diff --git a/libgfortran/intrinsics/random.c b/libgfortran/intrinsics/random.c
index 7476439647c..cad21fedb57 100644
--- a/libgfortran/intrinsics/random.c
+++ b/libgfortran/intrinsics/random.c
@@ -164,7 +164,7 @@ rnumber_16 (GFC_REAL_16 *f, GFC_UINTEGER_8 v1, GFC_UINTEGER_8 v2)
/*
- We use the xorshift1024* generator, a fast high-quality generator
+ We use the xoshiro256** generator, a fast high-quality generator
that:
- passes TestU1 without any failures
@@ -172,15 +172,15 @@ rnumber_16 (GFC_REAL_16 *f, GFC_UINTEGER_8 v1, GFC_UINTEGER_8 v2)
- provides a "jump" function making it easy to provide many
independent parallel streams.
- - Long period of 2**1024 - 1
+ - Long period of 2**256 - 1
A description can be found at
- http://vigna.di.unimi.it/ftp/papers/xorshift.pdf
+ http://prng.di.unimi.it/
or
- http://arxiv.org/abs/1402.6246
+ https://arxiv.org/abs/1805.01407
The paper includes public domain source code which is the basis for
the implementation below.
@@ -189,10 +189,9 @@ rnumber_16 (GFC_REAL_16 *f, GFC_UINTEGER_8 v1, GFC_UINTEGER_8 v2)
typedef struct
{
bool init;
- int p;
- uint64_t s[16];
+ uint64_t s[4];
}
-xorshift1024star_state;
+prng_state;
/* master_init, njumps, and master_state are the only variables
@@ -201,28 +200,24 @@ static bool master_init;
static unsigned njumps; /* How many times we have jumped. */
static uint64_t master_state[] = {
0xad63fa1ed3b55f36ULL, 0xd94473e78978b497ULL, 0xbc60592a98172477ULL,
- 0xa3de7c6e81265301ULL, 0x586640c5e785af27ULL, 0x7a2a3f63b67ce5eaULL,
- 0x9fde969f922d9b82ULL, 0xe6fe34379b3f3822ULL, 0x6c277eac3e99b6c2ULL,
- 0x9197290ab0d3f069ULL, 0xdb227302f6c25576ULL, 0xee0209aee527fae9ULL,
- 0x675666a793cd05b9ULL, 0xd048c99fbc70c20fULL, 0x775f8c3dba385ef5ULL,
- 0x625288bc262faf33ULL
+ 0xa3de7c6e81265301ULL
};
static __gthread_key_t rand_state_key;
-static xorshift1024star_state*
+static prng_state*
get_rand_state (void)
{
/* For single threaded apps. */
- static xorshift1024star_state rand_state;
+ static prng_state rand_state;
if (__gthread_active_p ())
{
void* p = __gthread_getspecific (rand_state_key);
if (!p)
{
- p = xcalloc (1, sizeof (xorshift1024star_state));
+ p = xcalloc (1, sizeof (prng_state));
__gthread_setspecific (rand_state_key, p);
}
return p;
@@ -231,76 +226,79 @@ get_rand_state (void)
return &rand_state;
}
+static inline uint64_t
+rotl (const uint64_t x, int k)
+{
+ return (x << k) | (x >> (64 - k));
+}
+
static uint64_t
-xorshift1024star (xorshift1024star_state* rs)
+prng_next (prng_state* rs)
{
- int p = rs->p;
- const uint64_t s0 = rs->s[p];
- uint64_t s1 = rs->s[p = (p + 1) & 15];
- s1 ^= s1 << 31;
- rs->s[p] = s1 ^ s0 ^ (s1 >> 11) ^ (s0 >> 30);
- rs->p = p;
- return rs->s[p] * UINT64_C(1181783497276652981);
+ const uint64_t result = rotl(rs->s[1] * 5, 7) * 9;
+
+ const uint64_t t = rs->s[1] << 17;
+
+ rs->s[2] ^= rs->s[0];
+ rs->s[3] ^= rs->s[1];
+ rs->s[1] ^= rs->s[2];
+ rs->s[0] ^= rs->s[3];
+
+ rs->s[2] ^= t;
+
+ rs->s[3] = rotl(rs->s[3], 45);
+
+ return result;
}
/* This is the jump function for the generator. It is equivalent to
- 2^512 calls to xorshift1024star(); it can be used to generate 2^512
+ 2^128 calls to prng_next(); it can be used to generate 2^128
non-overlapping subsequences for parallel computations. */
static void
-jump (xorshift1024star_state* rs)
+jump (prng_state* rs)
{
- static const uint64_t JUMP[] = {
- 0x84242f96eca9c41dULL, 0xa3c65b8776f96855ULL, 0x5b34a39f070b5837ULL,
- 0x4489affce4f31a1eULL, 0x2ffeeb0a48316f40ULL, 0xdc2d9891fe68c022ULL,
- 0x3659132bb12fea70ULL, 0xaac17d8efa43cab8ULL, 0xc4cb815590989b13ULL,
- 0x5ee975283d71c93bULL, 0x691548c86c1bd540ULL, 0x7910c41d10a1e6a5ULL,
- 0x0b5fc64563b3e2a8ULL, 0x047f7684e9fc949dULL, 0xb99181f2d8f685caULL,
- 0x284600e3f30e38c3ULL
- };
-
- uint64_t t[16] = { 0 };
+ static const uint64_t JUMP[] = { 0x180ec6d33cfd0aba, 0xd5a61266f0c9392c, 0xa9582618e03fc9aa, 0x39abdc4529b1661c };
+
+ uint64_t s0 = 0;
+ uint64_t s1 = 0;
+ uint64_t s2 = 0;
+ uint64_t s3 = 0;
for(size_t i = 0; i < sizeof JUMP / sizeof *JUMP; i++)
- for(int b = 0; b < 64; b++)
- {
- if (JUMP[i] & 1ULL << b)
- for(int j = 0; j < 16; j++)
- t[j] ^= rs->s[(j + rs->p) & 15];
- xorshift1024star (rs);
+ for(int b = 0; b < 64; b++) {
+ if (JUMP[i] & UINT64_C(1) << b) {
+ s0 ^= rs->s[0];
+ s1 ^= rs->s[1];
+ s2 ^= rs->s[2];
+ s3 ^= rs->s[3];
}
- for(int j = 0; j < 16; j++)
- rs->s[(j + rs->p) & 15] = t[j];
-}
+ prng_next (rs);
+ }
+ rs->s[0] = s0;
+ rs->s[1] = s1;
+ rs->s[2] = s2;
+ rs->s[3] = s3;
+}
-/* Super-simple LCG generator used in getosrandom () if /dev/urandom
- doesn't exist. */
-#define M 2147483647 /* 2^31 - 1 (A large prime number) */
-#define A 16807 /* Prime root of M, passes statistical tests and produces a full cycle */
-#define Q 127773 /* M / A (To avoid overflow on A * seed) */
-#define R 2836 /* M % A (To avoid overflow on A * seed) */
+/* Splitmix64 recommended by xoshiro author for initializing. After
+ getting one uint64_t value from the OS, this is used to fill in the
+ rest of the xoshiro state. */
-__attribute__((unused)) static uint32_t
-lcg_parkmiller(uint32_t seed)
+static uint64_t
+splitmix64 (uint64_t x)
{
- uint32_t hi = seed / Q;
- uint32_t lo = seed % Q;
- int32_t test = A * lo - R * hi;
- if (test <= 0)
- test += M;
- return test;
+ uint64_t z = (x += 0x9e3779b97f4a7c15);
+ z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
+ z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
+ return z ^ (z >> 31);
}
-#undef M
-#undef A
-#undef Q
-#undef R
-
-/* Get some random bytes from the operating system in order to seed
+/* Get some bytes from the operating system in order to seed
the PRNG. */
static int
@@ -315,7 +313,7 @@ getosrandom (void *buf, size_t buflen)
#else
#ifdef HAVE_GETENTROPY
if (getentropy (buf, buflen) == 0)
- return 0;
+ return buflen;
#endif
int flags = O_RDONLY;
#ifdef O_CLOEXEC
@@ -328,7 +326,7 @@ getosrandom (void *buf, size_t buflen)
close (fd);
return res;
}
- uint32_t seed = 1234567890;
+ uint64_t seed = 0x047f7684e9fc949dULL;
time_t secs;
long usecs;
if (gf_gettime (&secs, &usecs) == 0)
@@ -340,13 +338,9 @@ getosrandom (void *buf, size_t buflen)
pid_t pid = getpid();
seed ^= pid;
#endif
- uint32_t* ub = buf;
- for (size_t i = 0; i < buflen / sizeof (uint32_t); i++)
- {
- ub[i] = seed;
- seed = lcg_parkmiller (seed);
- }
- return buflen;
+ size_t size = buflen < sizeof (uint64_t) ? buflen : sizeof (uint64_t);
+ memcpy (buf, &seed, size);
+ return size;
#endif /* __MINGW64_VERSION_MAJOR */
}
@@ -355,13 +349,16 @@ getosrandom (void *buf, size_t buflen)
using the master state and the number of times we must jump. */
static void
-init_rand_state (xorshift1024star_state* rs, const bool locked)
+init_rand_state (prng_state* rs, const bool locked)
{
if (!locked)
__gthread_mutex_lock (&random_lock);
if (!master_init)
{
- getosrandom (master_state, sizeof (master_state));
+ uint64_t os_seed;
+ getosrandom (&os_seed, sizeof (os_seed));
+ for (uint64_t i = 0; i < sizeof (master_state) / sizeof (uint64_t); i++)
+ master_state[i] = splitmix64 (os_seed);
njumps = 0;
master_init = true;
}
@@ -381,11 +378,11 @@ init_rand_state (xorshift1024star_state* rs, const bool locked)
void
random_r4 (GFC_REAL_4 *x)
{
- xorshift1024star_state* rs = get_rand_state();
+ prng_state* rs = get_rand_state();
if (unlikely (!rs->init))
init_rand_state (rs, false);
- uint64_t r = xorshift1024star (rs);
+ uint64_t r = prng_next (rs);
/* Take the higher bits, ensuring that a stream of real(4), real(8),
and real(10) will be identical (except for precision). */
uint32_t high = (uint32_t) (r >> 32);
@@ -400,11 +397,11 @@ void
random_r8 (GFC_REAL_8 *x)
{
GFC_UINTEGER_8 r;
- xorshift1024star_state* rs = get_rand_state();
+ prng_state* rs = get_rand_state();
if (unlikely (!rs->init))
init_rand_state (rs, false);
- r = xorshift1024star (rs);
+ r = prng_next (rs);
rnumber_8 (x, r);
}
iexport(random_r8);
@@ -418,11 +415,11 @@ void
random_r10 (GFC_REAL_10 *x)
{
GFC_UINTEGER_8 r;
- xorshift1024star_state* rs = get_rand_state();
+ prng_state* rs = get_rand_state();
if (unlikely (!rs->init))
init_rand_state (rs, false);
- r = xorshift1024star (rs);
+ r = prng_next (rs);
rnumber_10 (x, r);
}
iexport(random_r10);
@@ -438,12 +435,12 @@ void
random_r16 (GFC_REAL_16 *x)
{
GFC_UINTEGER_8 r1, r2;
- xorshift1024star_state* rs = get_rand_state();
+ prng_state* rs = get_rand_state();
if (unlikely (!rs->init))
init_rand_state (rs, false);
- r1 = xorshift1024star (rs);
- r2 = xorshift1024star (rs);
+ r1 = prng_next (rs);
+ r2 = prng_next (rs);
rnumber_16 (x, r1, r2);
}
iexport(random_r16);
@@ -463,7 +460,7 @@ arandom_r4 (gfc_array_r4 *x)
index_type stride0;
index_type dim;
GFC_REAL_4 *dest;
- xorshift1024star_state* rs = get_rand_state();
+ prng_state* rs = get_rand_state();
dest = x->base_addr;
@@ -486,7 +483,7 @@ arandom_r4 (gfc_array_r4 *x)
while (dest)
{
/* random_r4 (dest); */
- uint64_t r = xorshift1024star (rs);
+ uint64_t r = prng_next (rs);
uint32_t high = (uint32_t) (r >> 32);
rnumber_4 (dest, high);
@@ -530,7 +527,7 @@ arandom_r8 (gfc_array_r8 *x)
index_type stride0;
index_type dim;
GFC_REAL_8 *dest;
- xorshift1024star_state* rs = get_rand_state();
+ prng_state* rs = get_rand_state();
dest = x->base_addr;
@@ -553,7 +550,7 @@ arandom_r8 (gfc_array_r8 *x)
while (dest)
{
/* random_r8 (dest); */
- uint64_t r = xorshift1024star (rs);
+ uint64_t r = prng_next (rs);
rnumber_8 (dest, r);
/* Advance to the next element. */
@@ -598,7 +595,7 @@ arandom_r10 (gfc_array_r10 *x)
index_type stride0;
index_type dim;
GFC_REAL_10 *dest;
- xorshift1024star_state* rs = get_rand_state();
+ prng_state* rs = get_rand_state();
dest = x->base_addr;
@@ -621,7 +618,7 @@ arandom_r10 (gfc_array_r10 *x)
while (dest)
{
/* random_r10 (dest); */
- uint64_t r = xorshift1024star (rs);
+ uint64_t r = prng_next (rs);
rnumber_10 (dest, r);
/* Advance to the next element. */
@@ -668,7 +665,7 @@ arandom_r16 (gfc_array_r16 *x)
index_type stride0;
index_type dim;
GFC_REAL_16 *dest;
- xorshift1024star_state* rs = get_rand_state();
+ prng_state* rs = get_rand_state();
dest = x->base_addr;
@@ -691,8 +688,8 @@ arandom_r16 (gfc_array_r16 *x)
while (dest)
{
/* random_r16 (dest); */
- uint64_t r1 = xorshift1024star (rs);
- uint64_t r2 = xorshift1024star (rs);
+ uint64_t r1 = prng_next (rs);
+ uint64_t r2 = prng_next (rs);
rnumber_16 (dest, r1, r2);
/* Advance to the next element. */
@@ -734,11 +731,7 @@ arandom_r16 (gfc_array_r16 *x)
static const uint64_t xor_keys[] = {
0xbd0c5b6e50c2df49ULL, 0xd46061cd46e1df38ULL, 0xbb4f4d4ed6103544ULL,
- 0x114a583d0756ad39ULL, 0x4b5ad8623d0aaab6ULL, 0x3f2ed7afbe0c0f21ULL,
- 0xdec83fd65f113445ULL, 0x3824f8fbc4f10d24ULL, 0x5d9025af05878911ULL,
- 0x500bc46b540340e9ULL, 0x8bd53298e0d00530ULL, 0x57886e40a952e06aULL,
- 0x926e76c88e31cdb6ULL, 0xbd0724dac0a3a5f9ULL, 0xc5c8981b858ab796ULL,
- 0xbb12ab2694c2b32cULL
+ 0x114a583d0756ad39ULL
};
@@ -768,9 +761,9 @@ random_seed_i4 (GFC_INTEGER_4 *size, gfc_array_i4 *put, gfc_array_i4 *get)
runtime_error ("RANDOM_SEED should have at most one argument present.");
if (size != NULL)
- *size = SZ + 1;
+ *size = SZ;
- xorshift1024star_state* rs = get_rand_state();
+ prng_state* rs = get_rand_state();
/* Return the seed to GET data. */
if (get != NULL)
@@ -780,7 +773,7 @@ random_seed_i4 (GFC_INTEGER_4 *size, gfc_array_i4 *put, gfc_array_i4 *get)
runtime_error ("Array rank of GET is not 1.");
/* If the array is too small, abort. */
- if (GFC_DESCRIPTOR_EXTENT(get,0) < (index_type) SZ + 1)
+ if (GFC_DESCRIPTOR_EXTENT(get,0) < (index_type) SZ)
runtime_error ("Array size of GET is too small.");
if (!rs->init)
@@ -794,9 +787,6 @@ random_seed_i4 (GFC_INTEGER_4 *size, gfc_array_i4 *put, gfc_array_i4 *get)
memcpy (&(get->base_addr[(SZ - 1 - i) * GFC_DESCRIPTOR_STRIDE(get,0)]),
(unsigned char*) seed + i * sizeof(GFC_UINTEGER_4),
sizeof(GFC_UINTEGER_4));
-
- /* Finally copy the value of p after the seed. */
- get->base_addr[SZ * GFC_DESCRIPTOR_STRIDE(get, 0)] = rs->p;
}
else
@@ -818,7 +808,7 @@ random_seed_i4 (GFC_INTEGER_4 *size, gfc_array_i4 *put, gfc_array_i4 *get)
runtime_error ("Array rank of PUT is not 1.");
/* If the array is too small, abort. */
- if (GFC_DESCRIPTOR_EXTENT(put,0) < (index_type) SZ + 1)
+ if (GFC_DESCRIPTOR_EXTENT(put,0) < (index_type) SZ)
runtime_error ("Array size of PUT is too small.");
/* We copy the seed given by the user. */
@@ -833,8 +823,6 @@ random_seed_i4 (GFC_INTEGER_4 *size, gfc_array_i4 *put, gfc_array_i4 *get)
njumps = 0;
master_init = true;
init_rand_state (rs, true);
-
- rs->p = put->base_addr[SZ * GFC_DESCRIPTOR_STRIDE(put, 0)] & 15;
}
__gthread_mutex_unlock (&random_lock);
@@ -855,9 +843,9 @@ random_seed_i8 (GFC_INTEGER_8 *size, gfc_array_i8 *put, gfc_array_i8 *get)
#define SZ (sizeof (master_state) / sizeof (GFC_INTEGER_8))
if (size != NULL)
- *size = SZ + 1;
+ *size = SZ;
- xorshift1024star_state* rs = get_rand_state();
+ prng_state* rs = get_rand_state();
/* Return the seed to GET data. */
if (get != NULL)
@@ -867,7 +855,7 @@ random_seed_i8 (GFC_INTEGER_8 *size, gfc_array_i8 *put, gfc_array_i8 *get)
runtime_error ("Array rank of GET is not 1.");
/* If the array is too small, abort. */
- if (GFC_DESCRIPTOR_EXTENT(get,0) < (index_type) SZ + 1)
+ if (GFC_DESCRIPTOR_EXTENT(get,0) < (index_type) SZ)
runtime_error ("Array size of GET is too small.");
if (!rs->init)
@@ -880,8 +868,6 @@ random_seed_i8 (GFC_INTEGER_8 *size, gfc_array_i8 *put, gfc_array_i8 *get)
for (size_t i = 0; i < SZ; i++)
memcpy (&(get->base_addr[i * GFC_DESCRIPTOR_STRIDE(get,0)]), &seed[i],
sizeof (GFC_UINTEGER_8));
-
- get->base_addr[SZ * GFC_DESCRIPTOR_STRIDE(get, 0)] = rs->p;
}
else
@@ -903,7 +889,7 @@ random_seed_i8 (GFC_INTEGER_8 *size, gfc_array_i8 *put, gfc_array_i8 *get)
runtime_error ("Array rank of PUT is not 1.");
/* If the array is too small, abort. */
- if (GFC_DESCRIPTOR_EXTENT(put,0) < (index_type) SZ + 1)
+ if (GFC_DESCRIPTOR_EXTENT(put,0) < (index_type) SZ)
runtime_error ("Array size of PUT is too small.");
/* This code now should do correct strides. */
@@ -915,7 +901,6 @@ random_seed_i8 (GFC_INTEGER_8 *size, gfc_array_i8 *put, gfc_array_i8 *get)
njumps = 0;
master_init = true;
init_rand_state (rs, true);
- rs->p = put->base_addr[SZ * GFC_DESCRIPTOR_STRIDE(put, 0)] & 15;
}