diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/xray/tests/unit/segmented_array_test.cc | 52 | ||||
-rw-r--r-- | lib/xray/xray_function_call_trie.h | 63 | ||||
-rw-r--r-- | lib/xray/xray_profile_collector.cc | 17 | ||||
-rw-r--r-- | lib/xray/xray_profiling.cc | 7 | ||||
-rw-r--r-- | lib/xray/xray_segmented_array.h | 308 |
5 files changed, 215 insertions, 232 deletions
diff --git a/lib/xray/tests/unit/segmented_array_test.cc b/lib/xray/tests/unit/segmented_array_test.cc index b48d481ff..035674ccf 100644 --- a/lib/xray/tests/unit/segmented_array_test.cc +++ b/lib/xray/tests/unit/segmented_array_test.cc @@ -15,16 +15,14 @@ struct TestData { TEST(SegmentedArrayTest, ConstructWithAllocators) { using AllocatorType = typename Array<TestData>::AllocatorType; AllocatorType A(1 << 4); - ChunkAllocator CA(1 << 4); - Array<TestData> Data(A, CA); + Array<TestData> Data(A); (void)Data; } TEST(SegmentedArrayTest, ConstructAndPopulate) { using AllocatorType = typename Array<TestData>::AllocatorType; AllocatorType A(1 << 4); - ChunkAllocator CA(1 << 4); - Array<TestData> data(A, CA); + Array<TestData> data(A); ASSERT_NE(data.Append(TestData{0, 0}), nullptr); ASSERT_NE(data.Append(TestData{1, 1}), nullptr); ASSERT_EQ(data.size(), 2u); @@ -33,8 +31,7 @@ TEST(SegmentedArrayTest, ConstructAndPopulate) { TEST(SegmentedArrayTest, ConstructPopulateAndLookup) { using AllocatorType = typename Array<TestData>::AllocatorType; AllocatorType A(1 << 4); - ChunkAllocator CA(1 << 4); - Array<TestData> data(A, CA); + Array<TestData> data(A); ASSERT_NE(data.Append(TestData{0, 1}), nullptr); ASSERT_EQ(data.size(), 1u); ASSERT_EQ(data[0].First, 0); @@ -44,8 +41,7 @@ TEST(SegmentedArrayTest, ConstructPopulateAndLookup) { TEST(SegmentedArrayTest, PopulateWithMoreElements) { using AllocatorType = typename Array<TestData>::AllocatorType; AllocatorType A(1 << 24); - ChunkAllocator CA(1 << 20); - Array<TestData> data(A, CA); + Array<TestData> data(A); static const auto kMaxElements = 100u; for (auto I = 0u; I < kMaxElements; ++I) { ASSERT_NE(data.Append(TestData{I, I + 1}), nullptr); @@ -60,8 +56,7 @@ TEST(SegmentedArrayTest, PopulateWithMoreElements) { TEST(SegmentedArrayTest, AppendEmplace) { using AllocatorType = typename Array<TestData>::AllocatorType; AllocatorType A(1 << 4); - ChunkAllocator CA(1 << 4); - Array<TestData> data(A, CA); + Array<TestData> data(A); ASSERT_NE(data.AppendEmplace(1, 1), nullptr); ASSERT_EQ(data[0].First, 1); ASSERT_EQ(data[0].Second, 1); @@ -70,8 +65,7 @@ TEST(SegmentedArrayTest, AppendEmplace) { TEST(SegmentedArrayTest, AppendAndTrim) { using AllocatorType = typename Array<TestData>::AllocatorType; AllocatorType A(1 << 4); - ChunkAllocator CA(1 << 4); - Array<TestData> data(A, CA); + Array<TestData> data(A); ASSERT_NE(data.AppendEmplace(1, 1), nullptr); ASSERT_EQ(data.size(), 1u); data.trim(1); @@ -82,8 +76,7 @@ TEST(SegmentedArrayTest, AppendAndTrim) { TEST(SegmentedArrayTest, IteratorAdvance) { using AllocatorType = typename Array<TestData>::AllocatorType; AllocatorType A(1 << 4); - ChunkAllocator CA(1 << 4); - Array<TestData> data(A, CA); + Array<TestData> data(A); ASSERT_TRUE(data.empty()); ASSERT_EQ(data.begin(), data.end()); auto I0 = data.begin(); @@ -104,8 +97,7 @@ TEST(SegmentedArrayTest, IteratorAdvance) { TEST(SegmentedArrayTest, IteratorRetreat) { using AllocatorType = typename Array<TestData>::AllocatorType; AllocatorType A(1 << 4); - ChunkAllocator CA(1 << 4); - Array<TestData> data(A, CA); + Array<TestData> data(A); ASSERT_TRUE(data.empty()); ASSERT_EQ(data.begin(), data.end()); ASSERT_NE(data.AppendEmplace(1, 1), nullptr); @@ -126,17 +118,16 @@ TEST(SegmentedArrayTest, IteratorRetreat) { TEST(SegmentedArrayTest, IteratorTrimBehaviour) { using AllocatorType = typename Array<TestData>::AllocatorType; AllocatorType A(1 << 20); - ChunkAllocator CA(1 << 10); - Array<TestData> Data(A, CA); + Array<TestData> Data(A); ASSERT_TRUE(Data.empty()); auto I0Begin = Data.begin(), I0End = Data.end(); // Add enough elements in Data to have more than one chunk. - constexpr auto Chunk = Array<TestData>::ChunkSize; - constexpr auto ChunkX2 = Chunk * 2; - for (auto i = ChunkX2; i > 0u; --i) { + constexpr auto Segment = Array<TestData>::SegmentSize; + constexpr auto SegmentX2 = Segment * 2; + for (auto i = SegmentX2; i > 0u; --i) { Data.AppendEmplace(static_cast<s64>(i), static_cast<s64>(i)); } - ASSERT_EQ(Data.size(), ChunkX2); + ASSERT_EQ(Data.size(), SegmentX2); { auto &Back = Data.back(); ASSERT_EQ(Back.First, 1); @@ -144,18 +135,18 @@ TEST(SegmentedArrayTest, IteratorTrimBehaviour) { } // Trim one chunk's elements worth. - Data.trim(Chunk); - ASSERT_EQ(Data.size(), Chunk); + Data.trim(Segment); + ASSERT_EQ(Data.size(), Segment); // Check that we are still able to access 'back' properly. { auto &Back = Data.back(); - ASSERT_EQ(Back.First, static_cast<s64>(Chunk + 1)); - ASSERT_EQ(Back.Second, static_cast<s64>(Chunk + 1)); + ASSERT_EQ(Back.First, static_cast<s64>(Segment + 1)); + ASSERT_EQ(Back.Second, static_cast<s64>(Segment + 1)); } // Then trim until it's empty. - Data.trim(Chunk); + Data.trim(Segment); ASSERT_TRUE(Data.empty()); // Here our iterators should be the same. @@ -164,10 +155,10 @@ TEST(SegmentedArrayTest, IteratorTrimBehaviour) { EXPECT_EQ(I0End, I1End); // Then we ensure that adding elements back works just fine. - for (auto i = ChunkX2; i > 0u; --i) { + for (auto i = SegmentX2; i > 0u; --i) { Data.AppendEmplace(static_cast<s64>(i), static_cast<s64>(i)); } - EXPECT_EQ(Data.size(), ChunkX2); + EXPECT_EQ(Data.size(), SegmentX2); } struct ShadowStackEntry { @@ -179,8 +170,7 @@ struct ShadowStackEntry { TEST(SegmentedArrayTest, SimulateStackBehaviour) { using AllocatorType = typename Array<ShadowStackEntry>::AllocatorType; AllocatorType A(1 << 10); - ChunkAllocator CA(1 << 10); - Array<ShadowStackEntry> Data(A, CA); + Array<ShadowStackEntry> Data(A); static uint64_t Dummy = 0; constexpr uint64_t Max = 9; diff --git a/lib/xray/xray_function_call_trie.h b/lib/xray/xray_function_call_trie.h index e6e31cec2..2acf14aa5 100644 --- a/lib/xray/xray_function_call_trie.h +++ b/lib/xray/xray_function_call_trie.h @@ -119,9 +119,9 @@ public: // We add a constructor here to allow us to inplace-construct through // Array<...>'s AppendEmplace. - Node(Node *P, NodeIdPairAllocatorType &A, ChunkAllocator &CA, int64_t CC, - int64_t CLT, int32_t F) - : Parent(P), Callees(A, CA), CallCount(CC), CumulativeLocalTime(CLT), + Node(Node *P, NodeIdPairAllocatorType &A, int64_t CC, int64_t CLT, + int32_t F) + : Parent(P), Callees(A), CallCount(CC), CumulativeLocalTime(CLT), FId(F) {} // TODO: Include the compact histogram. @@ -153,7 +153,6 @@ public: RootAllocatorType *RootAllocator = nullptr; ShadowStackAllocatorType *ShadowStackAllocator = nullptr; NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr; - ChunkAllocator *ChunkAlloc = nullptr; Allocators() {} Allocators(const Allocators &) = delete; @@ -162,12 +161,11 @@ public: Allocators(Allocators &&O) : NodeAllocator(O.NodeAllocator), RootAllocator(O.RootAllocator), ShadowStackAllocator(O.ShadowStackAllocator), - NodeIdPairAllocator(O.NodeIdPairAllocator), ChunkAlloc(O.ChunkAlloc) { + NodeIdPairAllocator(O.NodeIdPairAllocator) { O.NodeAllocator = nullptr; O.RootAllocator = nullptr; O.ShadowStackAllocator = nullptr; O.NodeIdPairAllocator = nullptr; - O.ChunkAlloc = nullptr; } Allocators &operator=(Allocators &&O) { @@ -191,11 +189,6 @@ public: O.NodeIdPairAllocator = this->NodeIdPairAllocator; this->NodeIdPairAllocator = Tmp; } - { - auto Tmp = O.ChunkAlloc; - O.ChunkAlloc = this->ChunkAlloc; - this->ChunkAlloc = Tmp; - } return *this; } @@ -223,11 +216,6 @@ public: InternalFree(NodeIdPairAllocator); NodeIdPairAllocator = nullptr; } - if (ChunkAlloc != nullptr) { - ChunkAlloc->~ChunkAllocator(); - InternalFree(ChunkAlloc); - ChunkAlloc = nullptr; - } } }; @@ -258,11 +246,6 @@ public: InternalAlloc(sizeof(NodeIdPairAllocatorType))); new (NodeIdPairAllocator) NodeIdPairAllocatorType(Max); A.NodeIdPairAllocator = NodeIdPairAllocator; - - auto ChunkAlloc = reinterpret_cast<ChunkAllocator *>( - InternalAlloc(sizeof(ChunkAllocator))); - new (ChunkAlloc) ChunkAllocator(Max); - A.ChunkAlloc = ChunkAlloc; return A; } @@ -271,22 +254,20 @@ private: RootArray Roots; ShadowStackArray ShadowStack; NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr; - ChunkAllocator *ChunkAlloc = nullptr; public: explicit FunctionCallTrie(const Allocators &A) - : Nodes(*A.NodeAllocator, *A.ChunkAlloc), - Roots(*A.RootAllocator, *A.ChunkAlloc), - ShadowStack(*A.ShadowStackAllocator, *A.ChunkAlloc), - NodeIdPairAllocator(A.NodeIdPairAllocator), ChunkAlloc(A.ChunkAlloc) {} + : Nodes(*A.NodeAllocator), Roots(*A.RootAllocator), + ShadowStack(*A.ShadowStackAllocator), + NodeIdPairAllocator(A.NodeIdPairAllocator) {} void enterFunction(const int32_t FId, uint64_t TSC) { DCHECK_NE(FId, 0); // This function primarily deals with ensuring that the ShadowStack is // consistent and ready for when an exit event is encountered. if (UNLIKELY(ShadowStack.empty())) { - auto NewRoot = Nodes.AppendEmplace(nullptr, *NodeIdPairAllocator, - *ChunkAlloc, 0, 0, FId); + auto NewRoot = + Nodes.AppendEmplace(nullptr, *NodeIdPairAllocator, 0, 0, FId); if (UNLIKELY(NewRoot == nullptr)) return; Roots.Append(NewRoot); @@ -309,8 +290,8 @@ public: } // This means we've never seen this stack before, create a new node here. - auto NewNode = Nodes.AppendEmplace(TopNode, *NodeIdPairAllocator, - *ChunkAlloc, 0, 0, FId); + auto NewNode = + Nodes.AppendEmplace(TopNode, *NodeIdPairAllocator, 0, 0, FId); if (UNLIKELY(NewNode == nullptr)) return; DCHECK_NE(NewNode, nullptr); @@ -370,13 +351,12 @@ public: typename Stack::AllocatorType StackAllocator( profilingFlags()->stack_allocator_max); - ChunkAllocator StackChunkAllocator(profilingFlags()->stack_allocator_max); - Stack DFSStack(StackAllocator, StackChunkAllocator); + Stack DFSStack(StackAllocator); for (const auto Root : getRoots()) { // Add a node in O for this root. auto NewRoot = O.Nodes.AppendEmplace( - nullptr, *O.NodeIdPairAllocator, *O.ChunkAlloc, Root->CallCount, + nullptr, *O.NodeIdPairAllocator, Root->CallCount, Root->CumulativeLocalTime, Root->FId); // Because we cannot allocate more memory we should bail out right away. @@ -395,9 +375,8 @@ public: DFSStack.trim(1); for (const auto Callee : NP.Node->Callees) { auto NewNode = O.Nodes.AppendEmplace( - NP.NewNode, *O.NodeIdPairAllocator, *O.ChunkAlloc, - Callee.NodePtr->CallCount, Callee.NodePtr->CumulativeLocalTime, - Callee.FId); + NP.NewNode, *O.NodeIdPairAllocator, Callee.NodePtr->CallCount, + Callee.NodePtr->CumulativeLocalTime, Callee.FId); if (UNLIKELY(NewNode == nullptr)) return; NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId); @@ -423,16 +402,15 @@ public: using Stack = Array<NodeAndTarget>; typename Stack::AllocatorType StackAllocator( profilingFlags()->stack_allocator_max); - ChunkAllocator CA(profilingFlags()->stack_allocator_max); - Stack DFSStack(StackAllocator, CA); + Stack DFSStack(StackAllocator); for (const auto Root : getRoots()) { Node *TargetRoot = nullptr; auto R = O.Roots.find_element( [&](const Node *Node) { return Node->FId == Root->FId; }); if (R == nullptr) { - TargetRoot = O.Nodes.AppendEmplace(nullptr, *O.NodeIdPairAllocator, - *O.ChunkAlloc, 0, 0, Root->FId); + TargetRoot = O.Nodes.AppendEmplace(nullptr, *O.NodeIdPairAllocator, 0, + 0, Root->FId); if (UNLIKELY(TargetRoot == nullptr)) return; @@ -456,9 +434,8 @@ public: return C.FId == Callee.FId; }); if (TargetCallee == nullptr) { - auto NewTargetNode = - O.Nodes.AppendEmplace(NT.TargetNode, *O.NodeIdPairAllocator, - *O.ChunkAlloc, 0, 0, Callee.FId); + auto NewTargetNode = O.Nodes.AppendEmplace( + NT.TargetNode, *O.NodeIdPairAllocator, 0, 0, Callee.FId); if (UNLIKELY(NewTargetNode == nullptr)) return; diff --git a/lib/xray/xray_profile_collector.cc b/lib/xray/xray_profile_collector.cc index ba347d13b..a43744d9a 100644 --- a/lib/xray/xray_profile_collector.cc +++ b/lib/xray/xray_profile_collector.cc @@ -118,12 +118,11 @@ struct ProfileRecord { const FunctionCallTrie::Node *Node = nullptr; // Constructor for in-place construction. - ProfileRecord(PathAllocator &A, ChunkAllocator &CA, - const FunctionCallTrie::Node *N) + ProfileRecord(PathAllocator &A, const FunctionCallTrie::Node *N) : Path([&] { auto P = reinterpret_cast<PathArray *>(InternalAlloc(sizeof(PathArray))); - new (P) PathArray(A, CA); + new (P) PathArray(A); return P; }()), Node(N) {} @@ -137,18 +136,17 @@ using ProfileRecordArray = Array<ProfileRecord>; // the path(s) and the data associated with the path. static void populateRecords(ProfileRecordArray &PRs, ProfileRecord::PathAllocator &PA, - ChunkAllocator &CA, const FunctionCallTrie &Trie) { + const FunctionCallTrie &Trie) { using StackArray = Array<const FunctionCallTrie::Node *>; using StackAllocator = typename StackArray::AllocatorType; StackAllocator StackAlloc(profilingFlags()->stack_allocator_max); - ChunkAllocator StackChunkAlloc(profilingFlags()->stack_allocator_max); - StackArray DFSStack(StackAlloc, StackChunkAlloc); + StackArray DFSStack(StackAlloc); for (const auto R : Trie.getRoots()) { DFSStack.Append(R); while (!DFSStack.empty()) { auto Node = DFSStack.back(); DFSStack.trim(1); - auto Record = PRs.AppendEmplace(PA, CA, Node); + auto Record = PRs.AppendEmplace(PA, Node); if (Record == nullptr) return; DCHECK_NE(Record, nullptr); @@ -214,10 +212,9 @@ void serialize() { for (u32 I = 0; I < ThreadTries->Size(); ++I) { using ProfileRecordAllocator = typename ProfileRecordArray::AllocatorType; ProfileRecordAllocator PRAlloc(profilingFlags()->global_allocator_max); - ChunkAllocator CA(profilingFlags()->global_allocator_max); ProfileRecord::PathAllocator PathAlloc( profilingFlags()->global_allocator_max); - ProfileRecordArray ProfileRecords(PRAlloc, CA); + ProfileRecordArray ProfileRecords(PRAlloc); // First, we want to compute the amount of space we're going to need. We'll // use a local allocator and an __xray::Array<...> to store the intermediary @@ -226,7 +223,7 @@ void serialize() { const auto &Trie = *(*ThreadTries)[I].Trie; if (Trie.getRoots().empty()) continue; - populateRecords(ProfileRecords, PathAlloc, CA, Trie); + populateRecords(ProfileRecords, PathAlloc, Trie); DCHECK(!Trie.getRoots().empty()); DCHECK(!ProfileRecords.empty()); diff --git a/lib/xray/xray_profiling.cc b/lib/xray/xray_profiling.cc index 3a0c0b90e..a11fddcc2 100644 --- a/lib/xray/xray_profiling.cc +++ b/lib/xray/xray_profiling.cc @@ -57,12 +57,13 @@ struct alignas(64) ProfilingData { static pthread_key_t ProfilingKey; thread_local std::aligned_storage<sizeof(ProfilingData)>::type ThreadStorage{}; - static ProfilingData &getThreadLocalData() XRAY_NEVER_INSTRUMENT { - if (pthread_getspecific(ProfilingKey) == NULL) { + thread_local auto ThreadOnce = [] { new (&ThreadStorage) ProfilingData{}; pthread_setspecific(ProfilingKey, &ThreadStorage); - } + return false; + }(); + (void)ThreadOnce; auto &TLD = *reinterpret_cast<ProfilingData *>(&ThreadStorage); diff --git a/lib/xray/xray_segmented_array.h b/lib/xray/xray_segmented_array.h index f001b230c..018c2aa9e 100644 --- a/lib/xray/xray_segmented_array.h +++ b/lib/xray/xray_segmented_array.h @@ -9,7 +9,7 @@ // // This file is a part of XRay, a dynamic runtime instrumentation system. // -// Defines the implementation of a segmented array, with fixed-size chunks +// Defines the implementation of a segmented array, with fixed-size segments // backing the segments. // //===----------------------------------------------------------------------===// @@ -24,113 +24,128 @@ #include <utility> namespace __xray { -struct Chunk { - void *Data; - Chunk *Prev; - Chunk *Next; -}; - -using ChunkAllocator = Allocator<next_pow2(sizeof(Chunk)) * 8>; /// The Array type provides an interface similar to std::vector<...> but does /// not shrink in size. Once constructed, elements can be appended but cannot be /// removed. The implementation is heavily dependent on the contract provided by /// the Allocator type, in that all memory will be released when the Allocator /// is destroyed. When an Array is destroyed, it will destroy elements in the -/// backing store but will not free the memory. The parameter N defines how many -/// elements of T there should be in a single block. -/// -/// We compute the least common multiple of the size of T and the cache line -/// size, to allow us to maximise the number of T objects we can place in -/// cache-line multiple sized blocks. To get back the number of T's, we divide -/// this least common multiple by the size of T. -template <class T, - size_t N = lcm(next_pow2(sizeof(T)), kCacheLineSize) / sizeof(T)> -struct Array { - static constexpr size_t ChunkSize = N; - static constexpr size_t ElementStorageSize = next_pow2(sizeof(T)); - static constexpr size_t AllocatorChunkSize = ElementStorageSize * ChunkSize; - using AllocatorType = Allocator<AllocatorChunkSize>; - static_assert(std::is_trivially_destructible<T>::value, - "T must be trivially destructible."); +/// backing store but will not free the memory. +template <class T> class Array { + struct SegmentBase { + SegmentBase *Prev; + SegmentBase *Next; + }; -private: - // TODO: Consider co-locating the chunk information with the data in the - // Block, as in an intrusive list -- i.e. putting the next and previous - // pointer values inside the Block storage. - static Chunk SentinelChunk; + // We want each segment of the array to be cache-line aligned, and elements of + // the array be offset from the beginning of the segment. + struct Segment : SegmentBase { + char Data[]; + }; + +public: + // Each segment of the array will be laid out with the following assumptions: + // + // - Each segment will be on a cache-line address boundary (kCacheLineSize + // aligned). + // + // - The elements will be accessed through an aligned pointer, dependent on + // the alignment of T. + // + // - Each element is at least two-pointers worth from the beginning of the + // Segment, aligned properly, and the rest of the elements are accessed + // through appropriate alignment. + // + // We then compute the size of the segment to follow this logic: + // + // - Compute the number of elements that can fit within + // kCacheLineSize-multiple segments, minus the size of two pointers. + // + // - Request cacheline-multiple sized elements from the allocator. + static constexpr size_t AlignedElementStorageSize = + sizeof(typename std::aligned_storage<sizeof(T), alignof(T)>::type); + + static constexpr size_t SegmentSize = + nearest_boundary(sizeof(Segment) + next_pow2(sizeof(T)), kCacheLineSize); + + using AllocatorType = Allocator<SegmentSize>; + + static constexpr size_t ElementsPerSegment = + (SegmentSize - sizeof(Segment)) / next_pow2(sizeof(T)); + + static_assert(ElementsPerSegment > 0, + "Must have at least 1 element per segment."); + + static SegmentBase SentinelSegment; +private: AllocatorType *Alloc; - ChunkAllocator *ChunkAlloc; - Chunk *Head = &SentinelChunk; - Chunk *Tail = &SentinelChunk; + SegmentBase *Head = &SentinelSegment; + SegmentBase *Tail = &SentinelSegment; size_t Size = 0; - // Here we keep track of chunks in the freelist, to allow us to re-use chunks - // when elements are trimmed off the end. - Chunk *Freelist = &SentinelChunk; + // Here we keep track of segments in the freelist, to allow us to re-use + // segments when elements are trimmed off the end. + SegmentBase *Freelist = &SentinelSegment; - Chunk *NewChunk() { + Segment *NewSegment() { // We need to handle the case in which enough elements have been trimmed to - // allow us to re-use chunks we've allocated before. For this we look into + // allow us to re-use segments we've allocated before. For this we look into // the Freelist, to see whether we need to actually allocate new blocks or // just re-use blocks we've already seen before. - if (Freelist != &SentinelChunk) { - auto *FreeChunk = Freelist; - Freelist = FreeChunk->Next; - FreeChunk->Next = &SentinelChunk; - Freelist->Prev = &SentinelChunk; - return FreeChunk; + if (Freelist != &SentinelSegment) { + auto *FreeSegment = Freelist; + Freelist = FreeSegment->Next; + FreeSegment->Next = &SentinelSegment; + Freelist->Prev = &SentinelSegment; + return static_cast<Segment *>(FreeSegment); } - auto Block = Alloc->Allocate(); - if (Block.Data == nullptr) - return nullptr; - - auto ChunkElement = ChunkAlloc->Allocate(); - if (ChunkElement.Data == nullptr) + auto SegmentBlock = Alloc->Allocate(); + if (SegmentBlock.Data == nullptr) return nullptr; - // Placement-new the Chunk element at the appropriate location. - auto C = reinterpret_cast<Chunk *>(ChunkElement.Data); - Chunk LocalC{Block.Data, &SentinelChunk, &SentinelChunk}; - internal_memcpy(C, &LocalC, sizeof(LocalC)); - return C; + // Placement-new the Segment element at the beginning of the SegmentBlock. + auto S = reinterpret_cast<Segment *>(SegmentBlock.Data); + new (S) SegmentBase{&SentinelSegment, &SentinelSegment}; + return S; } - Chunk *InitHeadAndTail() { - DCHECK_EQ(Head, &SentinelChunk); - DCHECK_EQ(Tail, &SentinelChunk); - auto Chunk = NewChunk(); - if (Chunk == nullptr) + Segment *InitHeadAndTail() { + DCHECK_EQ(Head, &SentinelSegment); + DCHECK_EQ(Tail, &SentinelSegment); + auto Segment = NewSegment(); + if (Segment == nullptr) return nullptr; - DCHECK_EQ(Chunk->Next, &SentinelChunk); - DCHECK_EQ(Chunk->Prev, &SentinelChunk); - return Head = Tail = Chunk; + DCHECK_EQ(Segment->Next, &SentinelSegment); + DCHECK_EQ(Segment->Prev, &SentinelSegment); + Head = Tail = static_cast<SegmentBase *>(Segment); + return Segment; } - Chunk *AppendNewChunk() { - auto Chunk = NewChunk(); - if (Chunk == nullptr) + Segment *AppendNewSegment() { + auto S = NewSegment(); + if (S == nullptr) return nullptr; - DCHECK_NE(Tail, &SentinelChunk); - DCHECK_EQ(Tail->Next, &SentinelChunk); - DCHECK_EQ(Chunk->Prev, &SentinelChunk); - DCHECK_EQ(Chunk->Next, &SentinelChunk); - Tail->Next = Chunk; - Chunk->Prev = Tail; - Tail = Chunk; - return Tail; + DCHECK_NE(Tail, &SentinelSegment); + DCHECK_EQ(Tail->Next, &SentinelSegment); + DCHECK_EQ(S->Prev, &SentinelSegment); + DCHECK_EQ(S->Next, &SentinelSegment); + Tail->Next = S; + S->Prev = Tail; + Tail = S; + return static_cast<Segment *>(Tail); } // This Iterator models a BidirectionalIterator. template <class U> class Iterator { - Chunk *C = &SentinelChunk; + SegmentBase *S = &SentinelSegment; size_t Offset = 0; size_t Size = 0; public: - Iterator(Chunk *IC, size_t Off, size_t S) : C(IC), Offset(Off), Size(S) {} + Iterator(SegmentBase *IS, size_t Off, size_t S) + : S(IS), Offset(Off), Size(S) {} Iterator(const Iterator &) noexcept = default; Iterator() noexcept = default; Iterator(Iterator &&) noexcept = default; @@ -139,28 +154,28 @@ private: ~Iterator() = default; Iterator &operator++() { - if (++Offset % N || Offset == Size) + if (++Offset % ElementsPerSegment || Offset == Size) return *this; // At this point, we know that Offset % N == 0, so we must advance the - // chunk pointer. - DCHECK_EQ(Offset % N, 0); + // segment pointer. + DCHECK_EQ(Offset % ElementsPerSegment, 0); DCHECK_NE(Offset, Size); - DCHECK_NE(C, &SentinelChunk); - DCHECK_NE(C->Next, &SentinelChunk); - C = C->Next; - DCHECK_NE(C, &SentinelChunk); + DCHECK_NE(S, &SentinelSegment); + DCHECK_NE(S->Next, &SentinelSegment); + S = S->Next; + DCHECK_NE(S, &SentinelSegment); return *this; } Iterator &operator--() { - DCHECK_NE(C, &SentinelChunk); + DCHECK_NE(S, &SentinelSegment); DCHECK_GT(Offset, 0); auto PreviousOffset = Offset--; - if (PreviousOffset != Size && PreviousOffset % N == 0) { - DCHECK_NE(C->Prev, &SentinelChunk); - C = C->Prev; + if (PreviousOffset != Size && PreviousOffset % ElementsPerSegment == 0) { + DCHECK_NE(S->Prev, &SentinelSegment); + S = S->Prev; } return *this; @@ -180,7 +195,7 @@ private: template <class V, class W> friend bool operator==(const Iterator<V> &L, const Iterator<W> &R) { - return L.C == R.C && L.Offset == R.Offset; + return L.S == R.S && L.Offset == R.Offset; } template <class V, class W> @@ -189,31 +204,29 @@ private: } U &operator*() const { - DCHECK_NE(C, &SentinelChunk); - auto RelOff = Offset % N; - return *reinterpret_cast<U *>(reinterpret_cast<char *>(C->Data) + - (RelOff * ElementStorageSize)); + DCHECK_NE(S, &SentinelSegment); + auto RelOff = Offset % ElementsPerSegment; + + // We need to compute the character-aligned pointer, offset from the + // segment's Data location to get the element in the position of Offset. + auto Base = static_cast<Segment *>(S)->Data; + auto AlignedOffset = Base + (RelOff * AlignedElementStorageSize); + return *reinterpret_cast<U *>(AlignedOffset); } - U *operator->() const { - DCHECK_NE(C, &SentinelChunk); - auto RelOff = Offset % N; - return reinterpret_cast<U *>(reinterpret_cast<char *>(C->Data) + - (RelOff * ElementStorageSize)); - } + U *operator->() const { return &(**this); } }; public: - explicit Array(AllocatorType &A, ChunkAllocator &CA) - : Alloc(&A), ChunkAlloc(&CA) {} + explicit Array(AllocatorType &A) : Alloc(&A) {} Array(const Array &) = delete; Array(Array &&O) NOEXCEPT : Alloc(O.Alloc), Head(O.Head), Tail(O.Tail), Size(O.Size) { - O.Head = &SentinelChunk; - O.Tail = &SentinelChunk; + O.Head = &SentinelSegment; + O.Tail = &SentinelSegment; O.Size = 0; } @@ -227,39 +240,41 @@ public: size_t size() const { return Size; } T *Append(const T &E) { - if (UNLIKELY(Head == &SentinelChunk)) + if (UNLIKELY(Head == &SentinelSegment)) if (InitHeadAndTail() == nullptr) return nullptr; - auto Offset = Size % N; + auto Offset = Size % ElementsPerSegment; if (UNLIKELY(Size != 0 && Offset == 0)) - if (AppendNewChunk() == nullptr) + if (AppendNewSegment() == nullptr) return nullptr; - auto Position = reinterpret_cast<T *>(reinterpret_cast<char *>(Tail->Data) + - (Offset * ElementStorageSize)); + auto Base = static_cast<Segment *>(Tail)->Data; + auto AlignedOffset = Base + (Offset * AlignedElementStorageSize); + auto Position = reinterpret_cast<T *>(AlignedOffset); *Position = E; ++Size; return Position; } template <class... Args> T *AppendEmplace(Args &&... args) { - if (UNLIKELY(Head == &SentinelChunk)) + if (UNLIKELY(Head == &SentinelSegment)) if (InitHeadAndTail() == nullptr) return nullptr; - auto Offset = Size % N; - auto *LatestChunk = Tail; + auto Offset = Size % ElementsPerSegment; + auto *LatestSegment = Tail; if (UNLIKELY(Size != 0 && Offset == 0)) { - LatestChunk = AppendNewChunk(); - if (LatestChunk == nullptr) + LatestSegment = AppendNewSegment(); + if (LatestSegment == nullptr) return nullptr; } - DCHECK_NE(Tail, &SentinelChunk); - auto Position = reinterpret_cast<char *>(LatestChunk->Data) + - (Offset * ElementStorageSize); - DCHECK_EQ(reinterpret_cast<uintptr_t>(Position) % ElementStorageSize, 0); + DCHECK_NE(Tail, &SentinelSegment); + auto Base = static_cast<Segment *>(LatestSegment)->Data; + auto AlignedOffset = Base + (Offset * AlignedElementStorageSize); + auto Position = reinterpret_cast<T *>(AlignedOffset); + // In-place construct at Position. new (Position) T{std::forward<Args>(args)...}; ++Size; @@ -269,24 +284,26 @@ public: T &operator[](size_t Offset) const { DCHECK_LE(Offset, Size); // We need to traverse the array enough times to find the element at Offset. - auto C = Head; - while (Offset >= N) { - C = C->Next; - Offset -= N; - DCHECK_NE(C, &SentinelChunk); + auto S = Head; + while (Offset >= ElementsPerSegment) { + S = S->Next; + Offset -= ElementsPerSegment; + DCHECK_NE(S, &SentinelSegment); } - return *reinterpret_cast<T *>(reinterpret_cast<char *>(C->Data) + - (Offset * ElementStorageSize)); + auto Base = static_cast<Segment *>(S)->Data; + auto AlignedOffset = Base + (Offset * AlignedElementStorageSize); + auto Position = reinterpret_cast<T *>(AlignedOffset); + return *reinterpret_cast<T *>(Position); } T &front() const { - DCHECK_NE(Head, &SentinelChunk); + DCHECK_NE(Head, &SentinelSegment); DCHECK_NE(Size, 0u); return *begin(); } T &back() const { - DCHECK_NE(Tail, &SentinelChunk); + DCHECK_NE(Tail, &SentinelSegment); DCHECK_NE(Size, 0u); auto It = end(); --It; @@ -313,28 +330,29 @@ public: auto OldSize = Size; Size -= Elements; - DCHECK_NE(Head, &SentinelChunk); - DCHECK_NE(Tail, &SentinelChunk); + DCHECK_NE(Head, &SentinelSegment); + DCHECK_NE(Tail, &SentinelSegment); - for (auto ChunksToTrim = - (nearest_boundary(OldSize, N) - nearest_boundary(Size, N)) / N; - ChunksToTrim > 0; --ChunksToTrim) { - DCHECK_NE(Head, &SentinelChunk); - DCHECK_NE(Tail, &SentinelChunk); + for (auto SegmentsToTrim = (nearest_boundary(OldSize, ElementsPerSegment) - + nearest_boundary(Size, ElementsPerSegment)) / + ElementsPerSegment; + SegmentsToTrim > 0; --SegmentsToTrim) { + DCHECK_NE(Head, &SentinelSegment); + DCHECK_NE(Tail, &SentinelSegment); // Put the tail into the Freelist. - auto *FreeChunk = Tail; + auto *FreeSegment = Tail; Tail = Tail->Prev; - if (Tail == &SentinelChunk) + if (Tail == &SentinelSegment) Head = Tail; else - Tail->Next = &SentinelChunk; - - DCHECK_EQ(Tail->Next, &SentinelChunk); - FreeChunk->Next = Freelist; - FreeChunk->Prev = &SentinelChunk; - if (Freelist != &SentinelChunk) - Freelist->Prev = FreeChunk; - Freelist = FreeChunk; + Tail->Next = &SentinelSegment; + + DCHECK_EQ(Tail->Next, &SentinelSegment); + FreeSegment->Next = Freelist; + FreeSegment->Prev = &SentinelSegment; + if (Freelist != &SentinelSegment) + Freelist->Prev = FreeSegment; + Freelist = FreeSegment; } } @@ -346,11 +364,11 @@ public: }; // We need to have this storage definition out-of-line so that the compiler can -// ensure that storage for the SentinelChunk is defined and has a single +// ensure that storage for the SentinelSegment is defined and has a single // address. -template <class T, size_t N> -Chunk Array<T, N>::SentinelChunk{nullptr, &Array<T, N>::SentinelChunk, - &Array<T, N>::SentinelChunk}; +template <class T> +typename Array<T>::SegmentBase Array<T>::SentinelSegment{ + &Array<T>::SentinelSegment, &Array<T>::SentinelSegment}; } // namespace __xray |