summaryrefslogtreecommitdiff
path: root/libgfortran
diff options
context:
space:
mode:
authorJanne Blomqvist <jb@gcc.gnu.org>2019-08-13 11:24:43 +0300
committerJanne Blomqvist <jb@gcc.gnu.org>2019-08-13 11:24:43 +0300
commit0e99e0933984e0c30fda1d089bfbd6857fc9273f (patch)
tree3f4221e36f548096ee6f0e64dab1b2660ceda1c6 /libgfortran
parent519acab098317f6eab49b612e49d8fcc6562da8f (diff)
PR fortran/91414: Improved PRNG
Update the PRNG from xorshift1024* to xoshiro256** by the same author. For details see http://prng.di.unimi.it/ and the paper at https://arxiv.org/abs/1805.01407 Also the seeding is slightly improved, by reading only 8 bytes from the operating system and using the simple splitmix64 PRNG to fill in the rest of the PRNG state (as recommended by the xoshiro author), instead of reading the entire state from the OS. Regtested on x86_64-pc-linux-gnu, Ok for trunk? gcc/fortran/ChangeLog: 2019-08-13 Janne Blomqvist <jb@gcc.gnu.org> PR fortran/91414 * check.c (gfc_check_random_seed): Reduce seed_size. * intrinsic.texi (RANDOM_NUMBER): Update to match new PRNG. gcc/testsuite/ChangeLog: 2019-08-13 Janne Blomqvist <jb@gcc.gnu.org> PR fortran/91414 * gfortran.dg/random_seed_1.f90: Update to match new seed size. libgfortran/ChangeLog: 2019-08-13 Janne Blomqvist <jb@gcc.gnu.org> PR fortran/91414 * intrinsics/random.c (prng_state): Update state struct. (master_state): Update to match new size. (get_rand_state): Update to match new PRNG. (rotl): New function. (xorshift1024star): Replace with prng_next. (prng_next): New function. (jump): Update for new PRNG. (lcg_parkmiller): Replace with splitmix64. (splitmix64): New function. (getosrandom): Fix return value, simplify. (init_rand_state): Use getosrandom only to get 8 bytes, splitmix64 to fill rest of state. (random_r4): Update to new function and struct names. (random_r8): Likewise. (random_r10): Likewise. (random_r16): Likewise. (arandom_r4): Liekwise. (arandom_r8): Likewise. (arandom_r10): Likwewise. (arandom_r16): Likewise. (xor_keys): Reduce size to match new PRNG. (random_seed_i4): Update to new function and struct names, remove special handling of variable p used in previous PRNG. (random_seed_i8): Likewise. From-SVN: r274361
Diffstat (limited to 'libgfortran')
-rw-r--r--libgfortran/ChangeLog28
-rw-r--r--libgfortran/intrinsics/random.c213
2 files changed, 127 insertions, 114 deletions
diff --git a/libgfortran/ChangeLog b/libgfortran/ChangeLog
index 447ed5a42c7..7a11ca29fd3 100644
--- a/libgfortran/ChangeLog
+++ b/libgfortran/ChangeLog
@@ -1,3 +1,31 @@
+2019-08-13 Janne Blomqvist <jb@gcc.gnu.org>
+
+ PR fortran/91414
+ * intrinsics/random.c (prng_state): Update state struct.
+ (master_state): Update to match new size.
+ (get_rand_state): Update to match new PRNG.
+ (rotl): New function.
+ (xorshift1024star): Replace with prng_next.
+ (prng_next): New function.
+ (jump): Update for new PRNG.
+ (lcg_parkmiller): Replace with splitmix64.
+ (splitmix64): New function.
+ (getosrandom): Fix return value, simplify.
+ (init_rand_state): Use getosrandom only to get 8 bytes, splitmix64
+ to fill rest of state.
+ (random_r4): Update to new function and struct names.
+ (random_r8): Likewise.
+ (random_r10): Likewise.
+ (random_r16): Likewise.
+ (arandom_r4): Liekwise.
+ (arandom_r8): Likewise.
+ (arandom_r10): Likwewise.
+ (arandom_r16): Likewise.
+ (xor_keys): Reduce size to match new PRNG.
+ (random_seed_i4): Update to new function and struct names, remove
+ special handling of variable p used in previous PRNG.
+ (random_seed_i8): Likewise.
+
2019-08-07 Janne Blomqvist <jb@gcc.gnu.org>
PR fortran/53796
diff --git a/libgfortran/intrinsics/random.c b/libgfortran/intrinsics/random.c
index 7476439647c..cad21fedb57 100644
--- a/libgfortran/intrinsics/random.c
+++ b/libgfortran/intrinsics/random.c
@@ -164,7 +164,7 @@ rnumber_16 (GFC_REAL_16 *f, GFC_UINTEGER_8 v1, GFC_UINTEGER_8 v2)
/*
- We use the xorshift1024* generator, a fast high-quality generator
+ We use the xoshiro256** generator, a fast high-quality generator
that:
- passes TestU1 without any failures
@@ -172,15 +172,15 @@ rnumber_16 (GFC_REAL_16 *f, GFC_UINTEGER_8 v1, GFC_UINTEGER_8 v2)
- provides a "jump" function making it easy to provide many
independent parallel streams.
- - Long period of 2**1024 - 1
+ - Long period of 2**256 - 1
A description can be found at
- http://vigna.di.unimi.it/ftp/papers/xorshift.pdf
+ http://prng.di.unimi.it/
or
- http://arxiv.org/abs/1402.6246
+ https://arxiv.org/abs/1805.01407
The paper includes public domain source code which is the basis for
the implementation below.
@@ -189,10 +189,9 @@ rnumber_16 (GFC_REAL_16 *f, GFC_UINTEGER_8 v1, GFC_UINTEGER_8 v2)
typedef struct
{
bool init;
- int p;
- uint64_t s[16];
+ uint64_t s[4];
}
-xorshift1024star_state;
+prng_state;
/* master_init, njumps, and master_state are the only variables
@@ -201,28 +200,24 @@ static bool master_init;
static unsigned njumps; /* How many times we have jumped. */
static uint64_t master_state[] = {
0xad63fa1ed3b55f36ULL, 0xd94473e78978b497ULL, 0xbc60592a98172477ULL,
- 0xa3de7c6e81265301ULL, 0x586640c5e785af27ULL, 0x7a2a3f63b67ce5eaULL,
- 0x9fde969f922d9b82ULL, 0xe6fe34379b3f3822ULL, 0x6c277eac3e99b6c2ULL,
- 0x9197290ab0d3f069ULL, 0xdb227302f6c25576ULL, 0xee0209aee527fae9ULL,
- 0x675666a793cd05b9ULL, 0xd048c99fbc70c20fULL, 0x775f8c3dba385ef5ULL,
- 0x625288bc262faf33ULL
+ 0xa3de7c6e81265301ULL
};
static __gthread_key_t rand_state_key;
-static xorshift1024star_state*
+static prng_state*
get_rand_state (void)
{
/* For single threaded apps. */
- static xorshift1024star_state rand_state;
+ static prng_state rand_state;
if (__gthread_active_p ())
{
void* p = __gthread_getspecific (rand_state_key);
if (!p)
{
- p = xcalloc (1, sizeof (xorshift1024star_state));
+ p = xcalloc (1, sizeof (prng_state));
__gthread_setspecific (rand_state_key, p);
}
return p;
@@ -231,76 +226,79 @@ get_rand_state (void)
return &rand_state;
}
+static inline uint64_t
+rotl (const uint64_t x, int k)
+{
+ return (x << k) | (x >> (64 - k));
+}
+
static uint64_t
-xorshift1024star (xorshift1024star_state* rs)
+prng_next (prng_state* rs)
{
- int p = rs->p;
- const uint64_t s0 = rs->s[p];
- uint64_t s1 = rs->s[p = (p + 1) & 15];
- s1 ^= s1 << 31;
- rs->s[p] = s1 ^ s0 ^ (s1 >> 11) ^ (s0 >> 30);
- rs->p = p;
- return rs->s[p] * UINT64_C(1181783497276652981);
+ const uint64_t result = rotl(rs->s[1] * 5, 7) * 9;
+
+ const uint64_t t = rs->s[1] << 17;
+
+ rs->s[2] ^= rs->s[0];
+ rs->s[3] ^= rs->s[1];
+ rs->s[1] ^= rs->s[2];
+ rs->s[0] ^= rs->s[3];
+
+ rs->s[2] ^= t;
+
+ rs->s[3] = rotl(rs->s[3], 45);
+
+ return result;
}
/* This is the jump function for the generator. It is equivalent to
- 2^512 calls to xorshift1024star(); it can be used to generate 2^512
+ 2^128 calls to prng_next(); it can be used to generate 2^128
non-overlapping subsequences for parallel computations. */
static void
-jump (xorshift1024star_state* rs)
+jump (prng_state* rs)
{
- static const uint64_t JUMP[] = {
- 0x84242f96eca9c41dULL, 0xa3c65b8776f96855ULL, 0x5b34a39f070b5837ULL,
- 0x4489affce4f31a1eULL, 0x2ffeeb0a48316f40ULL, 0xdc2d9891fe68c022ULL,
- 0x3659132bb12fea70ULL, 0xaac17d8efa43cab8ULL, 0xc4cb815590989b13ULL,
- 0x5ee975283d71c93bULL, 0x691548c86c1bd540ULL, 0x7910c41d10a1e6a5ULL,
- 0x0b5fc64563b3e2a8ULL, 0x047f7684e9fc949dULL, 0xb99181f2d8f685caULL,
- 0x284600e3f30e38c3ULL
- };
-
- uint64_t t[16] = { 0 };
+ static const uint64_t JUMP[] = { 0x180ec6d33cfd0aba, 0xd5a61266f0c9392c, 0xa9582618e03fc9aa, 0x39abdc4529b1661c };
+
+ uint64_t s0 = 0;
+ uint64_t s1 = 0;
+ uint64_t s2 = 0;
+ uint64_t s3 = 0;
for(size_t i = 0; i < sizeof JUMP / sizeof *JUMP; i++)
- for(int b = 0; b < 64; b++)
- {
- if (JUMP[i] & 1ULL << b)
- for(int j = 0; j < 16; j++)
- t[j] ^= rs->s[(j + rs->p) & 15];
- xorshift1024star (rs);
+ for(int b = 0; b < 64; b++) {
+ if (JUMP[i] & UINT64_C(1) << b) {
+ s0 ^= rs->s[0];
+ s1 ^= rs->s[1];
+ s2 ^= rs->s[2];
+ s3 ^= rs->s[3];
}
- for(int j = 0; j < 16; j++)
- rs->s[(j + rs->p) & 15] = t[j];
-}
+ prng_next (rs);
+ }
+ rs->s[0] = s0;
+ rs->s[1] = s1;
+ rs->s[2] = s2;
+ rs->s[3] = s3;
+}
-/* Super-simple LCG generator used in getosrandom () if /dev/urandom
- doesn't exist. */
-#define M 2147483647 /* 2^31 - 1 (A large prime number) */
-#define A 16807 /* Prime root of M, passes statistical tests and produces a full cycle */
-#define Q 127773 /* M / A (To avoid overflow on A * seed) */
-#define R 2836 /* M % A (To avoid overflow on A * seed) */
+/* Splitmix64 recommended by xoshiro author for initializing. After
+ getting one uint64_t value from the OS, this is used to fill in the
+ rest of the xoshiro state. */
-__attribute__((unused)) static uint32_t
-lcg_parkmiller(uint32_t seed)
+static uint64_t
+splitmix64 (uint64_t x)
{
- uint32_t hi = seed / Q;
- uint32_t lo = seed % Q;
- int32_t test = A * lo - R * hi;
- if (test <= 0)
- test += M;
- return test;
+ uint64_t z = (x += 0x9e3779b97f4a7c15);
+ z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
+ z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
+ return z ^ (z >> 31);
}
-#undef M
-#undef A
-#undef Q
-#undef R
-
-/* Get some random bytes from the operating system in order to seed
+/* Get some bytes from the operating system in order to seed
the PRNG. */
static int
@@ -315,7 +313,7 @@ getosrandom (void *buf, size_t buflen)
#else
#ifdef HAVE_GETENTROPY
if (getentropy (buf, buflen) == 0)
- return 0;
+ return buflen;
#endif
int flags = O_RDONLY;
#ifdef O_CLOEXEC
@@ -328,7 +326,7 @@ getosrandom (void *buf, size_t buflen)
close (fd);
return res;
}
- uint32_t seed = 1234567890;
+ uint64_t seed = 0x047f7684e9fc949dULL;
time_t secs;
long usecs;
if (gf_gettime (&secs, &usecs) == 0)
@@ -340,13 +338,9 @@ getosrandom (void *buf, size_t buflen)
pid_t pid = getpid();
seed ^= pid;
#endif
- uint32_t* ub = buf;
- for (size_t i = 0; i < buflen / sizeof (uint32_t); i++)
- {
- ub[i] = seed;
- seed = lcg_parkmiller (seed);
- }
- return buflen;
+ size_t size = buflen < sizeof (uint64_t) ? buflen : sizeof (uint64_t);
+ memcpy (buf, &seed, size);
+ return size;
#endif /* __MINGW64_VERSION_MAJOR */
}
@@ -355,13 +349,16 @@ getosrandom (void *buf, size_t buflen)
using the master state and the number of times we must jump. */
static void
-init_rand_state (xorshift1024star_state* rs, const bool locked)
+init_rand_state (prng_state* rs, const bool locked)
{
if (!locked)
__gthread_mutex_lock (&random_lock);
if (!master_init)
{
- getosrandom (master_state, sizeof (master_state));
+ uint64_t os_seed;
+ getosrandom (&os_seed, sizeof (os_seed));
+ for (uint64_t i = 0; i < sizeof (master_state) / sizeof (uint64_t); i++)
+ master_state[i] = splitmix64 (os_seed);
njumps = 0;
master_init = true;
}
@@ -381,11 +378,11 @@ init_rand_state (xorshift1024star_state* rs, const bool locked)
void
random_r4 (GFC_REAL_4 *x)
{
- xorshift1024star_state* rs = get_rand_state();
+ prng_state* rs = get_rand_state();
if (unlikely (!rs->init))
init_rand_state (rs, false);
- uint64_t r = xorshift1024star (rs);
+ uint64_t r = prng_next (rs);
/* Take the higher bits, ensuring that a stream of real(4), real(8),
and real(10) will be identical (except for precision). */
uint32_t high = (uint32_t) (r >> 32);
@@ -400,11 +397,11 @@ void
random_r8 (GFC_REAL_8 *x)
{
GFC_UINTEGER_8 r;
- xorshift1024star_state* rs = get_rand_state();
+ prng_state* rs = get_rand_state();
if (unlikely (!rs->init))
init_rand_state (rs, false);
- r = xorshift1024star (rs);
+ r = prng_next (rs);
rnumber_8 (x, r);
}
iexport(random_r8);
@@ -418,11 +415,11 @@ void
random_r10 (GFC_REAL_10 *x)
{
GFC_UINTEGER_8 r;
- xorshift1024star_state* rs = get_rand_state();
+ prng_state* rs = get_rand_state();
if (unlikely (!rs->init))
init_rand_state (rs, false);
- r = xorshift1024star (rs);
+ r = prng_next (rs);
rnumber_10 (x, r);
}
iexport(random_r10);
@@ -438,12 +435,12 @@ void
random_r16 (GFC_REAL_16 *x)
{
GFC_UINTEGER_8 r1, r2;
- xorshift1024star_state* rs = get_rand_state();
+ prng_state* rs = get_rand_state();
if (unlikely (!rs->init))
init_rand_state (rs, false);
- r1 = xorshift1024star (rs);
- r2 = xorshift1024star (rs);
+ r1 = prng_next (rs);
+ r2 = prng_next (rs);
rnumber_16 (x, r1, r2);
}
iexport(random_r16);
@@ -463,7 +460,7 @@ arandom_r4 (gfc_array_r4 *x)
index_type stride0;
index_type dim;
GFC_REAL_4 *dest;
- xorshift1024star_state* rs = get_rand_state();
+ prng_state* rs = get_rand_state();
dest = x->base_addr;
@@ -486,7 +483,7 @@ arandom_r4 (gfc_array_r4 *x)
while (dest)
{
/* random_r4 (dest); */
- uint64_t r = xorshift1024star (rs);
+ uint64_t r = prng_next (rs);
uint32_t high = (uint32_t) (r >> 32);
rnumber_4 (dest, high);
@@ -530,7 +527,7 @@ arandom_r8 (gfc_array_r8 *x)
index_type stride0;
index_type dim;
GFC_REAL_8 *dest;
- xorshift1024star_state* rs = get_rand_state();
+ prng_state* rs = get_rand_state();
dest = x->base_addr;
@@ -553,7 +550,7 @@ arandom_r8 (gfc_array_r8 *x)
while (dest)
{
/* random_r8 (dest); */
- uint64_t r = xorshift1024star (rs);
+ uint64_t r = prng_next (rs);
rnumber_8 (dest, r);
/* Advance to the next element. */
@@ -598,7 +595,7 @@ arandom_r10 (gfc_array_r10 *x)
index_type stride0;
index_type dim;
GFC_REAL_10 *dest;
- xorshift1024star_state* rs = get_rand_state();
+ prng_state* rs = get_rand_state();
dest = x->base_addr;
@@ -621,7 +618,7 @@ arandom_r10 (gfc_array_r10 *x)
while (dest)
{
/* random_r10 (dest); */
- uint64_t r = xorshift1024star (rs);
+ uint64_t r = prng_next (rs);
rnumber_10 (dest, r);
/* Advance to the next element. */
@@ -668,7 +665,7 @@ arandom_r16 (gfc_array_r16 *x)
index_type stride0;
index_type dim;
GFC_REAL_16 *dest;
- xorshift1024star_state* rs = get_rand_state();
+ prng_state* rs = get_rand_state();
dest = x->base_addr;
@@ -691,8 +688,8 @@ arandom_r16 (gfc_array_r16 *x)
while (dest)
{
/* random_r16 (dest); */
- uint64_t r1 = xorshift1024star (rs);
- uint64_t r2 = xorshift1024star (rs);
+ uint64_t r1 = prng_next (rs);
+ uint64_t r2 = prng_next (rs);
rnumber_16 (dest, r1, r2);
/* Advance to the next element. */
@@ -734,11 +731,7 @@ arandom_r16 (gfc_array_r16 *x)
static const uint64_t xor_keys[] = {
0xbd0c5b6e50c2df49ULL, 0xd46061cd46e1df38ULL, 0xbb4f4d4ed6103544ULL,
- 0x114a583d0756ad39ULL, 0x4b5ad8623d0aaab6ULL, 0x3f2ed7afbe0c0f21ULL,
- 0xdec83fd65f113445ULL, 0x3824f8fbc4f10d24ULL, 0x5d9025af05878911ULL,
- 0x500bc46b540340e9ULL, 0x8bd53298e0d00530ULL, 0x57886e40a952e06aULL,
- 0x926e76c88e31cdb6ULL, 0xbd0724dac0a3a5f9ULL, 0xc5c8981b858ab796ULL,
- 0xbb12ab2694c2b32cULL
+ 0x114a583d0756ad39ULL
};
@@ -768,9 +761,9 @@ random_seed_i4 (GFC_INTEGER_4 *size, gfc_array_i4 *put, gfc_array_i4 *get)
runtime_error ("RANDOM_SEED should have at most one argument present.");
if (size != NULL)
- *size = SZ + 1;
+ *size = SZ;
- xorshift1024star_state* rs = get_rand_state();
+ prng_state* rs = get_rand_state();
/* Return the seed to GET data. */
if (get != NULL)
@@ -780,7 +773,7 @@ random_seed_i4 (GFC_INTEGER_4 *size, gfc_array_i4 *put, gfc_array_i4 *get)
runtime_error ("Array rank of GET is not 1.");
/* If the array is too small, abort. */
- if (GFC_DESCRIPTOR_EXTENT(get,0) < (index_type) SZ + 1)
+ if (GFC_DESCRIPTOR_EXTENT(get,0) < (index_type) SZ)
runtime_error ("Array size of GET is too small.");
if (!rs->init)
@@ -794,9 +787,6 @@ random_seed_i4 (GFC_INTEGER_4 *size, gfc_array_i4 *put, gfc_array_i4 *get)
memcpy (&(get->base_addr[(SZ - 1 - i) * GFC_DESCRIPTOR_STRIDE(get,0)]),
(unsigned char*) seed + i * sizeof(GFC_UINTEGER_4),
sizeof(GFC_UINTEGER_4));
-
- /* Finally copy the value of p after the seed. */
- get->base_addr[SZ * GFC_DESCRIPTOR_STRIDE(get, 0)] = rs->p;
}
else
@@ -818,7 +808,7 @@ random_seed_i4 (GFC_INTEGER_4 *size, gfc_array_i4 *put, gfc_array_i4 *get)
runtime_error ("Array rank of PUT is not 1.");
/* If the array is too small, abort. */
- if (GFC_DESCRIPTOR_EXTENT(put,0) < (index_type) SZ + 1)
+ if (GFC_DESCRIPTOR_EXTENT(put,0) < (index_type) SZ)
runtime_error ("Array size of PUT is too small.");
/* We copy the seed given by the user. */
@@ -833,8 +823,6 @@ random_seed_i4 (GFC_INTEGER_4 *size, gfc_array_i4 *put, gfc_array_i4 *get)
njumps = 0;
master_init = true;
init_rand_state (rs, true);
-
- rs->p = put->base_addr[SZ * GFC_DESCRIPTOR_STRIDE(put, 0)] & 15;
}
__gthread_mutex_unlock (&random_lock);
@@ -855,9 +843,9 @@ random_seed_i8 (GFC_INTEGER_8 *size, gfc_array_i8 *put, gfc_array_i8 *get)
#define SZ (sizeof (master_state) / sizeof (GFC_INTEGER_8))
if (size != NULL)
- *size = SZ + 1;
+ *size = SZ;
- xorshift1024star_state* rs = get_rand_state();
+ prng_state* rs = get_rand_state();
/* Return the seed to GET data. */
if (get != NULL)
@@ -867,7 +855,7 @@ random_seed_i8 (GFC_INTEGER_8 *size, gfc_array_i8 *put, gfc_array_i8 *get)
runtime_error ("Array rank of GET is not 1.");
/* If the array is too small, abort. */
- if (GFC_DESCRIPTOR_EXTENT(get,0) < (index_type) SZ + 1)
+ if (GFC_DESCRIPTOR_EXTENT(get,0) < (index_type) SZ)
runtime_error ("Array size of GET is too small.");
if (!rs->init)
@@ -880,8 +868,6 @@ random_seed_i8 (GFC_INTEGER_8 *size, gfc_array_i8 *put, gfc_array_i8 *get)
for (size_t i = 0; i < SZ; i++)
memcpy (&(get->base_addr[i * GFC_DESCRIPTOR_STRIDE(get,0)]), &seed[i],
sizeof (GFC_UINTEGER_8));
-
- get->base_addr[SZ * GFC_DESCRIPTOR_STRIDE(get, 0)] = rs->p;
}
else
@@ -903,7 +889,7 @@ random_seed_i8 (GFC_INTEGER_8 *size, gfc_array_i8 *put, gfc_array_i8 *get)
runtime_error ("Array rank of PUT is not 1.");
/* If the array is too small, abort. */
- if (GFC_DESCRIPTOR_EXTENT(put,0) < (index_type) SZ + 1)
+ if (GFC_DESCRIPTOR_EXTENT(put,0) < (index_type) SZ)
runtime_error ("Array size of PUT is too small.");
/* This code now should do correct strides. */
@@ -915,7 +901,6 @@ random_seed_i8 (GFC_INTEGER_8 *size, gfc_array_i8 *put, gfc_array_i8 *get)
njumps = 0;
master_init = true;
init_rand_state (rs, true);
- rs->p = put->base_addr[SZ * GFC_DESCRIPTOR_STRIDE(put, 0)] & 15;
}