[scudo] Remove GetActuallyAllocatedSize calls from the fast path

Summary: GetActuallyAllocatedSize is actually expensive. In order to avoid calling this function in the malloc/free fast path, we change the Scudo chunk header to store the size of the chunk, if from the Primary, or the amount of unused bytes if from the Secondary. This way, we only have to call the culprit function for Secondary backed allocations (and still in realloc). The performance gain on a singly threaded pure malloc/free benchmark exercising the Primary allocator is above 5%. Reviewers: alekseyshl, kcc, dvyukov Reviewed By: dvyukov Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D32299 git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@300861 91177308-0d34-0410-b5e6-96231b3b80d8
author: Kostya Kortchinsky <kostyak@google.com> 2017-04-20 18:07:17 +0000
committer: Kostya Kortchinsky <kostyak@google.com> 2017-04-20 18:07:17 +0000
commit: ba4bcb3734311880844d4802a46333de9040d996 (patch)
tree: f99e5bcb110e3f5b1a5a84e67d2f50bbc864efd3 /lib/scudo
parent: ac9e6ea23b96de782d512ffd7a0dd71e88c0798d (diff)
2 files changed, 49 insertions, 30 deletions
diff --git a/lib/scudo/scudo_allocator.cpp b/lib/scudo/scudo_allocator.cpp
index 6bf2fa1e5..9812fc0f5 100644
--- a/lib/scudo/scudo_allocator.cpp
+++ b/lib/scudo/scudo_allocator.cpp
@@ -341,14 +341,14 @@ struct ScudoAllocator {
       dieWithMessage("ERROR: the maximum possible offset doesn't fit in the "
                      "header\n");
     }
-    // Verify that we can fit the maximum amount of unused bytes in the header.
-    // Given that the Secondary fits the allocation to a page, the worst case
-    // scenario happens in the Primary. It will depend on the second to last
-    // and last class sizes, as well as the dynamic base for the Primary. The
-    // following is an over-approximation that works for our needs.
-    uptr MaxUnusedBytes = SizeClassMap::kMaxSize - 1 - AlignedChunkHeaderSize;
-    Header.UnusedBytes = MaxUnusedBytes;
-    if (Header.UnusedBytes != MaxUnusedBytes) {
+    // Verify that we can fit the maximum size or amount of unused bytes in the
+    // header. Given that the Secondary fits the allocation to a page, the worst
+    // case scenario happens in the Primary. It will depend on the second to
+    // last and last class sizes, as well as the dynamic base for the Primary.
+    // The following is an over-approximation that works for our needs.
+    uptr MaxSizeOrUnusedBytes = SizeClassMap::kMaxSize - 1;
+    Header.SizeOrUnusedBytes = MaxSizeOrUnusedBytes;
+    if (Header.SizeOrUnusedBytes != MaxSizeOrUnusedBytes) {
       dieWithMessage("ERROR: the maximum possible unused bytes doesn't fit in "
                      "the header\n");
     }
@@ -428,11 +428,9 @@ struct ScudoAllocator {
         NeededSize -= Alignment;
     }
 
-    uptr ActuallyAllocatedSize = BackendAllocator.GetActuallyAllocatedSize(
-        reinterpret_cast<void *>(AllocBeg));
     // If requested, we will zero out the entire contents of the returned chunk.
     if ((ForceZeroContents || ZeroContents) && FromPrimary)
-       memset(Ptr, 0, ActuallyAllocatedSize);
+       memset(Ptr, 0, BackendAllocator.GetActuallyAllocatedSize(Ptr));
 
     uptr UserBeg = AllocBeg + AlignedChunkHeaderSize;
     if (!IsAligned(UserBeg, Alignment))
@@ -443,8 +441,18 @@ struct ScudoAllocator {
     uptr Offset = UserBeg - AlignedChunkHeaderSize - AllocBeg;
     Header.Offset = Offset >> MinAlignmentLog;
     Header.AllocType = Type;
-    Header.UnusedBytes = ActuallyAllocatedSize - Offset -
-        AlignedChunkHeaderSize - Size;
+    if (FromPrimary) {
+      Header.FromPrimary = FromPrimary;
+      Header.SizeOrUnusedBytes = Size;
+    } else {
+      // The secondary fits the allocations to a page, so the amount of unused
+      // bytes is the difference between the end of the user allocation and the
+      // next page boundary.
+      uptr PageSize = GetPageSizeCached();
+      uptr TrailingBytes = (UserBeg + Size) & (PageSize - 1);
+      if (TrailingBytes)
+        Header.SizeOrUnusedBytes = PageSize - TrailingBytes;
+    }
     Header.Salt = static_cast<u8>(Prng.getNext());
     getScudoChunk(UserBeg)->storeHeader(&Header);
     void *UserPtr = reinterpret_cast<void *>(UserBeg);
@@ -482,8 +490,8 @@ struct ScudoAllocator {
         }
       }
     }
-    uptr UsableSize = Chunk->getUsableSize(&OldHeader);
-    uptr Size = UsableSize - OldHeader.UnusedBytes;
+    uptr Size = OldHeader.FromPrimary ? OldHeader.SizeOrUnusedBytes :
+        Chunk->getUsableSize(&OldHeader) - OldHeader.SizeOrUnusedBytes;
     if (DeleteSizeMismatch) {
       if (DeleteSize && DeleteSize != Size) {
         dieWithMessage("ERROR: invalid sized delete on chunk at address %p\n",
@@ -495,14 +503,19 @@ struct ScudoAllocator {
     NewHeader.State = ChunkQuarantine;
     Chunk->compareExchangeHeader(&NewHeader, &OldHeader);
 
+    // If a small memory amount was allocated with a larger alignment, we want
+    // to take that into account. Otherwise the Quarantine would be filled with
+    // tiny chunks, taking a lot of VA memory. This an approximation of the
+    // usable size, that allows us to not call GetActuallyAllocatedSize.
+    uptr LiableSize = Size + (OldHeader.Offset << MinAlignment);
     if (LIKELY(!ThreadTornDown)) {
       AllocatorQuarantine.Put(&ThreadQuarantineCache,
-                              QuarantineCallback(&Cache), Chunk, UsableSize);
+                              QuarantineCallback(&Cache), Chunk, LiableSize);
     } else {
       SpinMutexLock l(&FallbackMutex);
       AllocatorQuarantine.Put(&FallbackQuarantineCache,
                               QuarantineCallback(&FallbackAllocatorCache),
-                              Chunk, UsableSize);
+                              Chunk, LiableSize);
     }
   }
 
@@ -529,9 +542,12 @@ struct ScudoAllocator {
     }
     uptr UsableSize = Chunk->getUsableSize(&OldHeader);
     UnpackedHeader NewHeader = OldHeader;
-    // The new size still fits in the current chunk.
-    if (NewSize <= UsableSize) {
-      NewHeader.UnusedBytes = UsableSize - NewSize;
+    // The new size still fits in the current chunk, and the size difference
+    // is reasonable.
+    if (NewSize <= UsableSize &&
+        (UsableSize - NewSize) < (SizeClassMap::kMaxSize / 2)) {
+      NewHeader.SizeOrUnusedBytes =
+                OldHeader.FromPrimary ? NewSize : UsableSize - NewSize;
       Chunk->compareExchangeHeader(&NewHeader, &OldHeader);
       return OldPtr;
     }
@@ -539,7 +555,8 @@ struct ScudoAllocator {
     // old one.
     void *NewPtr = allocate(NewSize, MinAlignment, FromMalloc);
     if (NewPtr) {
-      uptr OldSize = UsableSize - OldHeader.UnusedBytes;
+      uptr OldSize = OldHeader.FromPrimary ? OldHeader.SizeOrUnusedBytes :
+          UsableSize - OldHeader.SizeOrUnusedBytes;
       memcpy(NewPtr, OldPtr, Min(NewSize, OldSize));
       NewHeader.State = ChunkQuarantine;
       Chunk->compareExchangeHeader(&NewHeader, &OldHeader);
diff --git a/lib/scudo/scudo_allocator.h b/lib/scudo/scudo_allocator.h
index 1696e1389..e7428f170 100644
--- a/lib/scudo/scudo_allocator.h
+++ b/lib/scudo/scudo_allocator.h
@@ -44,15 +44,17 @@ enum ChunkState : u8 {
 // well. The header will be atomically loaded and stored.
 typedef u64 PackedHeader;
 struct UnpackedHeader {
-  u64 Checksum    : 16;
-  u64 UnusedBytes : 20; // Needed for reallocation purposes.
-  u64 State       : 2;  // available, allocated, or quarantined
-  u64 AllocType   : 2;  // malloc, new, new[], or memalign
-  u64 Offset      : 16; // Offset from the beginning of the backend
-                        // allocation to the beginning of the chunk itself,
-                        // in multiples of MinAlignment. See comment about
-                        // its maximum value and test in init().
-  u64 Salt        : 8;
+  u64 Checksum          : 16;
+  u64 SizeOrUnusedBytes : 19; // Size for Primary backed allocations, amount of
+                              // unused bytes in the chunk for Secondary ones.
+  u64 FromPrimary       : 1;
+  u64 State             : 2;  // available, allocated, or quarantined
+  u64 AllocType         : 2;  // malloc, new, new[], or memalign
+  u64 Offset            : 16; // Offset from the beginning of the backend
+                              // allocation to the beginning of the chunk
+                              // itself, in multiples of MinAlignment. See
+                              /// comment about its maximum value and in init().
+  u64 Salt              : 8;
 };
 
 typedef atomic_uint64_t AtomicPackedHeader;
author	Kostya Kortchinsky <kostyak@google.com>	2017-04-20 18:07:17 +0000
committer	Kostya Kortchinsky <kostyak@google.com>	2017-04-20 18:07:17 +0000
commit	ba4bcb3734311880844d4802a46333de9040d996 (patch)
tree	f99e5bcb110e3f5b1a5a84e67d2f50bbc864efd3 /lib/scudo
parent	ac9e6ea23b96de782d512ffd7a0dd71e88c0798d (diff)