[scudo] PRNG makeover

Summary: This follows the addition of `GetRandom` with D34412. We remove our `/dev/urandom` code and use the new function. Additionally, change the PRNG for a slightly faster version. One of the issues with the old code is that we have 64 full bits of randomness per "next", using only 8 of those for the Salt and discarding the rest. So we add a cached u64 in the PRNG that can serve up to 8 u8 before having to call the "next" function again. During some integration work, I also realized that some very early processes (like `init`) do not benefit from `/dev/urandom` yet. So if there is no `getrandom` syscall as well, we have to fallback to some sort of initialization of the PRNG. Now a few words on why XoRoShiRo and not something else. I have played a while with various PRNGs on 32 & 64 bit platforms. Some results are below. LCG 32 & 64 are usually faster but produce respectively 15 & 31 bits of entropy, meaning that to get a full 64-bit, you would need to call them several times. The simple XorShift is fast, produces 32 bits but is mediocre with regard to PRNG test suites, PCG is slower overall, and XoRoShiRo is faster than XorShift128+ and produces full 64 bits. %%% root@tulip-chiphd:/data # ./randtest.arm [+] starting xs32... [?] xs32 duration: 22431833053ns [+] starting lcg32... [?] lcg32 duration: 14941402090ns [+] starting pcg32... [?] pcg32 duration: 44941973771ns [+] starting xs128p... [?] xs128p duration: 48889786981ns [+] starting lcg64... [?] lcg64 duration: 33831042391ns [+] starting xos128p... [?] xos128p duration: 44850878605ns root@tulip-chiphd:/data # ./randtest.aarch64 [+] starting xs32... [?] xs32 duration: 22425151678ns [+] starting lcg32... [?] lcg32 duration: 14954255257ns [+] starting pcg32... [?] pcg32 duration: 37346265726ns [+] starting xs128p... [?] xs128p duration: 22523807219ns [+] starting lcg64... [?] lcg64 duration: 26141304679ns [+] starting xos128p... [?] xos128p duration: 14937033215ns %%% Reviewers: alekseyshl Reviewed By: alekseyshl Subscribers: aemerson, kristof.beyls, llvm-commits Differential Revision: https://reviews.llvm.org/D35221 git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@307798 91177308-0d34-0410-b5e6-96231b3b80d8
author: Kostya Kortchinsky <kostyak@google.com> 2017-07-12 15:29:08 +0000
committer: Kostya Kortchinsky <kostyak@google.com> 2017-07-12 15:29:08 +0000
commit: e637b93bd999afb198d4a69e16c685026d016c30 (patch)
tree: 461c3e16525f4b70f3a7b442671d8564d7f217cd /lib/scudo
parent: d16bb5ca26596aa7986e89129e93047dc6ad91a0 (diff)
4 files changed, 55 insertions, 56 deletions
diff --git a/lib/scudo/scudo_allocator.cpp b/lib/scudo/scudo_allocator.cpp
index 00fa19218..ec9132f90 100644
--- a/lib/scudo/scudo_allocator.cpp
+++ b/lib/scudo/scudo_allocator.cpp
@@ -264,7 +264,7 @@ ScudoQuarantineCache *getQuarantineCache(ScudoThreadContext *ThreadContext) {
       ScudoQuarantineCache *>(ThreadContext->QuarantineCachePlaceHolder);
 }
 
-Xorshift128Plus *getPrng(ScudoThreadContext *ThreadContext) {
+ScudoPrng *getPrng(ScudoThreadContext *ThreadContext) {
   return &ThreadContext->Prng;
 }
 
@@ -283,7 +283,7 @@ struct ScudoAllocator {
   StaticSpinMutex FallbackMutex;
   AllocatorCache FallbackAllocatorCache;
   ScudoQuarantineCache FallbackQuarantineCache;
-  Xorshift128Plus FallbackPrng;
+  ScudoPrng FallbackPrng;
 
   bool DeallocationTypeMismatch;
   bool ZeroContents;
@@ -333,8 +333,8 @@ struct ScudoAllocator {
         static_cast<uptr>(Options.QuarantineSizeMb) << 20,
         static_cast<uptr>(Options.ThreadLocalQuarantineSizeKb) << 10);
     BackendAllocator.InitCache(&FallbackAllocatorCache);
-    FallbackPrng.initFromURandom();
-    Cookie = FallbackPrng.getNext();
+    FallbackPrng.init();
+    Cookie = FallbackPrng.getU64();
   }
 
   // Helper function that checks for a valid Scudo chunk. nullptr isn't.
@@ -373,19 +373,19 @@ struct ScudoAllocator {
     bool FromPrimary = PrimaryAllocator::CanAllocate(AlignedSize, MinAlignment);
 
     void *Ptr;
-    uptr Salt;
+    u8 Salt;
     uptr AllocationSize = FromPrimary ? AlignedSize : NeededSize;
     uptr AllocationAlignment = FromPrimary ? MinAlignment : Alignment;
     ScudoThreadContext *ThreadContext = getThreadContextAndLock();
     if (LIKELY(ThreadContext)) {
-      Salt = getPrng(ThreadContext)->getNext();
+      Salt = getPrng(ThreadContext)->getU8();
       Ptr = BackendAllocator.Allocate(getAllocatorCache(ThreadContext),
                                       AllocationSize, AllocationAlignment,
                                       FromPrimary);
       ThreadContext->unlock();
     } else {
       SpinMutexLock l(&FallbackMutex);
-      Salt = FallbackPrng.getNext();
+      Salt = FallbackPrng.getU8();
       Ptr = BackendAllocator.Allocate(&FallbackAllocatorCache, AllocationSize,
                                       AllocationAlignment, FromPrimary);
     }
@@ -612,7 +612,7 @@ static void initScudoInternal(const AllocatorOptions &Options) {
 
 void ScudoThreadContext::init() {
   getBackendAllocator().InitCache(&Cache);
-  Prng.initFromURandom();
+  Prng.init();
   memset(QuarantineCachePlaceHolder, 0, sizeof(QuarantineCachePlaceHolder));
 }
 
diff --git a/lib/scudo/scudo_tls.h b/lib/scudo/scudo_tls.h
index f6039bebe..20c49204c 100644
--- a/lib/scudo/scudo_tls.h
+++ b/lib/scudo/scudo_tls.h
@@ -30,7 +30,7 @@ namespace __scudo {
 
 struct ALIGNED(64) ScudoThreadContext : public ScudoThreadContextPlatform {
   AllocatorCache Cache;
-  Xorshift128Plus Prng;
+  ScudoPrng Prng;
   uptr QuarantineCachePlaceHolder[4];
   void init();
   void commitBack();
diff --git a/lib/scudo/scudo_utils.cpp b/lib/scudo/scudo_utils.cpp
index 31c391946..f7903ff34 100644
--- a/lib/scudo/scudo_utils.cpp
+++ b/lib/scudo/scudo_utils.cpp
@@ -123,40 +123,4 @@ bool testCPUFeature(CPUFeature Feature) {
 }
 #endif  // defined(__x86_64__) || defined(__i386__)
 
-// readRetry will attempt to read Count bytes from the Fd specified, and if
-// interrupted will retry to read additional bytes to reach Count.
-static ssize_t readRetry(int Fd, u8 *Buffer, size_t Count) {
-  ssize_t AmountRead = 0;
-  while (static_cast<size_t>(AmountRead) < Count) {
-    ssize_t Result = read(Fd, Buffer + AmountRead, Count - AmountRead);
-    if (Result > 0)
-      AmountRead += Result;
-    else if (!Result)
-      break;
-    else if (errno != EINTR) {
-      AmountRead = -1;
-      break;
-    }
-  }
-  return AmountRead;
-}
-
-static void fillRandom(u8 *Data, ssize_t Size) {
-  int Fd = open("/dev/urandom", O_RDONLY);
-  if (Fd < 0) {
-    dieWithMessage("ERROR: failed to open /dev/urandom.\n");
-  }
-  bool Success = readRetry(Fd, Data, Size) == Size;
-  close(Fd);
-  if (!Success) {
-    dieWithMessage("ERROR: failed to read enough data from /dev/urandom.\n");
-  }
-}
-
-// Seeds the xorshift state with /dev/urandom.
-// TODO(kostyak): investigate using getrandom() if available.
-void Xorshift128Plus::initFromURandom() {
-  fillRandom(reinterpret_cast<u8 *>(State), sizeof(State));
-}
-
 }  // namespace __scudo
diff --git a/lib/scudo/scudo_utils.h b/lib/scudo/scudo_utils.h
index 7198476f4..6c6c9d893 100644
--- a/lib/scudo/scudo_utils.h
+++ b/lib/scudo/scudo_utils.h
@@ -36,23 +36,58 @@ enum CPUFeature {
 };
 bool testCPUFeature(CPUFeature feature);
 
-// Tiny PRNG based on https://en.wikipedia.org/wiki/Xorshift#xorshift.2B
-// The state (128 bits) will be stored in thread local storage.
-struct Xorshift128Plus {
+INLINE u64 rotl(const u64 X, int K) {
+  return (X << K) | (X >> (64 - K));
+}
+
+// XoRoShiRo128+ PRNG (http://xoroshiro.di.unimi.it/).
+struct XoRoShiRo128Plus {
  public:
-  void initFromURandom();
-  u64 getNext() {
-    u64 x = State[0];
-    const u64 y = State[1];
-    State[0] = y;
-    x ^= x << 23;
-    State[1] = x ^ y ^ (x >> 17) ^ (y >> 26);
-    return State[1] + y;
+  void init() {
+    if (UNLIKELY(!GetRandom(reinterpret_cast<void *>(State), sizeof(State)))) {
+      // Early processes (eg: init) do not have /dev/urandom yet, but we still
+      // have to provide them with some degree of entropy. Not having a secure
+      // seed is not as problematic for them, as they are less likely to be
+      // the target of heap based vulnerabilities exploitation attempts.
+      State[0] = NanoTime();
+      State[1] = 0;
+    }
+    fillCache();
   }
+  u8 getU8() {
+    if (UNLIKELY(isCacheEmpty()))
+      fillCache();
+    const u8 Result = static_cast<u8>(CachedBytes & 0xff);
+    CachedBytes >>= 8;
+    CachedBytesAvailable--;
+    return Result;
+  }
+  u64 getU64() { return next(); }
+
  private:
+  u8 CachedBytesAvailable;
+  u64 CachedBytes;
   u64 State[2];
+  u64 next() {
+    const u64 S0 = State[0];
+    u64 S1 = State[1];
+    const u64 Result = S0 + S1;
+    S1 ^= S0;
+    State[0] = rotl(S0, 55) ^ S1 ^ (S1 << 14);
+    State[1] = rotl(S1, 36);
+    return Result;
+  }
+  bool isCacheEmpty() {
+    return CachedBytesAvailable == 0;
+  }
+  void fillCache() {
+    CachedBytes = next();
+    CachedBytesAvailable = sizeof(CachedBytes);
+  }
 };
 
+typedef XoRoShiRo128Plus ScudoPrng;
+
 }  // namespace __scudo
 
 #endif  // SCUDO_UTILS_H_
author	Kostya Kortchinsky <kostyak@google.com>	2017-07-12 15:29:08 +0000
committer	Kostya Kortchinsky <kostyak@google.com>	2017-07-12 15:29:08 +0000
commit	e637b93bd999afb198d4a69e16c685026d016c30 (patch)
tree	461c3e16525f4b70f3a7b442671d8564d7f217cd /lib/scudo
parent	d16bb5ca26596aa7986e89129e93047dc6ad91a0 (diff)