summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/xray/tests/unit/allocator_test.cc6
-rw-r--r--lib/xray/tests/unit/function_call_trie_test.cc11
-rw-r--r--lib/xray/tests/unit/segmented_array_test.cc57
-rw-r--r--lib/xray/xray_allocator.h170
-rw-r--r--lib/xray/xray_function_call_trie.h80
-rw-r--r--lib/xray/xray_profile_collector.cc26
-rw-r--r--lib/xray/xray_profiling.cc37
-rw-r--r--lib/xray/xray_profiling_flags.inc2
-rw-r--r--lib/xray/xray_segmented_array.h62
-rw-r--r--lib/xray/xray_utils.h10
10 files changed, 245 insertions, 216 deletions
diff --git a/lib/xray/tests/unit/allocator_test.cc b/lib/xray/tests/unit/allocator_test.cc
index 7636d296a..be404160e 100644
--- a/lib/xray/tests/unit/allocator_test.cc
+++ b/lib/xray/tests/unit/allocator_test.cc
@@ -22,16 +22,16 @@ struct TestData {
s64 Second;
};
-TEST(AllocatorTest, Construction) { Allocator<sizeof(TestData)> A(2 << 11, 0); }
+TEST(AllocatorTest, Construction) { Allocator<sizeof(TestData)> A(2 << 11); }
TEST(AllocatorTest, Allocate) {
- Allocator<sizeof(TestData)> A(2 << 11, 0);
+ Allocator<sizeof(TestData)> A(2 << 11);
auto B = A.Allocate();
ASSERT_NE(B.Data, nullptr);
}
TEST(AllocatorTest, OverAllocate) {
- Allocator<sizeof(TestData)> A(sizeof(TestData), 0);
+ Allocator<sizeof(TestData)> A(sizeof(TestData));
auto B1 = A.Allocate();
(void)B1;
auto B2 = A.Allocate();
diff --git a/lib/xray/tests/unit/function_call_trie_test.cc b/lib/xray/tests/unit/function_call_trie_test.cc
index 6ad51aa14..049ecfb07 100644
--- a/lib/xray/tests/unit/function_call_trie_test.cc
+++ b/lib/xray/tests/unit/function_call_trie_test.cc
@@ -19,8 +19,6 @@ namespace __xray {
namespace {
TEST(FunctionCallTrieTest, ConstructWithTLSAllocators) {
- // FIXME: Support passing in configuration for allocators in the allocator
- // constructors.
profilingFlags()->setDefaults();
FunctionCallTrie::Allocators Allocators = FunctionCallTrie::InitAllocators();
FunctionCallTrie Trie(Allocators);
@@ -47,6 +45,7 @@ TEST(FunctionCallTrieTest, EnterAndExitFunction) {
}
TEST(FunctionCallTrieTest, MissingFunctionEntry) {
+ profilingFlags()->setDefaults();
auto A = FunctionCallTrie::InitAllocators();
FunctionCallTrie Trie(A);
Trie.exitFunction(1, 1);
@@ -56,6 +55,7 @@ TEST(FunctionCallTrieTest, MissingFunctionEntry) {
}
TEST(FunctionCallTrieTest, NoMatchingEntersForExit) {
+ profilingFlags()->setDefaults();
auto A = FunctionCallTrie::InitAllocators();
FunctionCallTrie Trie(A);
Trie.enterFunction(2, 1);
@@ -63,16 +63,19 @@ TEST(FunctionCallTrieTest, NoMatchingEntersForExit) {
Trie.exitFunction(1, 5);
const auto &R = Trie.getRoots();
- ASSERT_TRUE(R.empty());
+ ASSERT_FALSE(R.empty());
+ EXPECT_EQ(R.size(), size_t{1});
}
TEST(FunctionCallTrieTest, MissingFunctionExit) {
+ profilingFlags()->setDefaults();
auto A = FunctionCallTrie::InitAllocators();
FunctionCallTrie Trie(A);
Trie.enterFunction(1, 1);
const auto &R = Trie.getRoots();
- ASSERT_TRUE(R.empty());
+ ASSERT_FALSE(R.empty());
+ EXPECT_EQ(R.size(), size_t{1});
}
TEST(FunctionCallTrieTest, MultipleRoots) {
diff --git a/lib/xray/tests/unit/segmented_array_test.cc b/lib/xray/tests/unit/segmented_array_test.cc
index 539162d7d..b48d481ff 100644
--- a/lib/xray/tests/unit/segmented_array_test.cc
+++ b/lib/xray/tests/unit/segmented_array_test.cc
@@ -12,27 +12,29 @@ struct TestData {
TestData(s64 F, s64 S) : First(F), Second(S) {}
};
-TEST(SegmentedArrayTest, Construction) {
- Array<TestData> Data;
- (void)Data;
-}
-
-TEST(SegmentedArrayTest, ConstructWithAllocator) {
+TEST(SegmentedArrayTest, ConstructWithAllocators) {
using AllocatorType = typename Array<TestData>::AllocatorType;
- AllocatorType A(1 << 4, 0);
- Array<TestData> Data(A);
+ AllocatorType A(1 << 4);
+ ChunkAllocator CA(1 << 4);
+ Array<TestData> Data(A, CA);
(void)Data;
}
TEST(SegmentedArrayTest, ConstructAndPopulate) {
- Array<TestData> data;
+ using AllocatorType = typename Array<TestData>::AllocatorType;
+ AllocatorType A(1 << 4);
+ ChunkAllocator CA(1 << 4);
+ Array<TestData> data(A, CA);
ASSERT_NE(data.Append(TestData{0, 0}), nullptr);
ASSERT_NE(data.Append(TestData{1, 1}), nullptr);
ASSERT_EQ(data.size(), 2u);
}
TEST(SegmentedArrayTest, ConstructPopulateAndLookup) {
- Array<TestData> data;
+ using AllocatorType = typename Array<TestData>::AllocatorType;
+ AllocatorType A(1 << 4);
+ ChunkAllocator CA(1 << 4);
+ Array<TestData> data(A, CA);
ASSERT_NE(data.Append(TestData{0, 1}), nullptr);
ASSERT_EQ(data.size(), 1u);
ASSERT_EQ(data[0].First, 0);
@@ -40,7 +42,10 @@ TEST(SegmentedArrayTest, ConstructPopulateAndLookup) {
}
TEST(SegmentedArrayTest, PopulateWithMoreElements) {
- Array<TestData> data;
+ using AllocatorType = typename Array<TestData>::AllocatorType;
+ AllocatorType A(1 << 24);
+ ChunkAllocator CA(1 << 20);
+ Array<TestData> data(A, CA);
static const auto kMaxElements = 100u;
for (auto I = 0u; I < kMaxElements; ++I) {
ASSERT_NE(data.Append(TestData{I, I + 1}), nullptr);
@@ -53,14 +58,20 @@ TEST(SegmentedArrayTest, PopulateWithMoreElements) {
}
TEST(SegmentedArrayTest, AppendEmplace) {
- Array<TestData> data;
+ using AllocatorType = typename Array<TestData>::AllocatorType;
+ AllocatorType A(1 << 4);
+ ChunkAllocator CA(1 << 4);
+ Array<TestData> data(A, CA);
ASSERT_NE(data.AppendEmplace(1, 1), nullptr);
ASSERT_EQ(data[0].First, 1);
ASSERT_EQ(data[0].Second, 1);
}
TEST(SegmentedArrayTest, AppendAndTrim) {
- Array<TestData> data;
+ using AllocatorType = typename Array<TestData>::AllocatorType;
+ AllocatorType A(1 << 4);
+ ChunkAllocator CA(1 << 4);
+ Array<TestData> data(A, CA);
ASSERT_NE(data.AppendEmplace(1, 1), nullptr);
ASSERT_EQ(data.size(), 1u);
data.trim(1);
@@ -69,7 +80,10 @@ TEST(SegmentedArrayTest, AppendAndTrim) {
}
TEST(SegmentedArrayTest, IteratorAdvance) {
- Array<TestData> data;
+ using AllocatorType = typename Array<TestData>::AllocatorType;
+ AllocatorType A(1 << 4);
+ ChunkAllocator CA(1 << 4);
+ Array<TestData> data(A, CA);
ASSERT_TRUE(data.empty());
ASSERT_EQ(data.begin(), data.end());
auto I0 = data.begin();
@@ -88,7 +102,10 @@ TEST(SegmentedArrayTest, IteratorAdvance) {
}
TEST(SegmentedArrayTest, IteratorRetreat) {
- Array<TestData> data;
+ using AllocatorType = typename Array<TestData>::AllocatorType;
+ AllocatorType A(1 << 4);
+ ChunkAllocator CA(1 << 4);
+ Array<TestData> data(A, CA);
ASSERT_TRUE(data.empty());
ASSERT_EQ(data.begin(), data.end());
ASSERT_NE(data.AppendEmplace(1, 1), nullptr);
@@ -108,8 +125,9 @@ TEST(SegmentedArrayTest, IteratorRetreat) {
TEST(SegmentedArrayTest, IteratorTrimBehaviour) {
using AllocatorType = typename Array<TestData>::AllocatorType;
- AllocatorType A(1 << 10, 0);
- Array<TestData> Data(A);
+ AllocatorType A(1 << 20);
+ ChunkAllocator CA(1 << 10);
+ Array<TestData> Data(A, CA);
ASSERT_TRUE(Data.empty());
auto I0Begin = Data.begin(), I0End = Data.end();
// Add enough elements in Data to have more than one chunk.
@@ -160,8 +178,9 @@ struct ShadowStackEntry {
TEST(SegmentedArrayTest, SimulateStackBehaviour) {
using AllocatorType = typename Array<ShadowStackEntry>::AllocatorType;
- AllocatorType A(1 << 10, 0);
- Array<ShadowStackEntry> Data(A);
+ AllocatorType A(1 << 10);
+ ChunkAllocator CA(1 << 10);
+ Array<ShadowStackEntry> Data(A, CA);
static uint64_t Dummy = 0;
constexpr uint64_t Max = 9;
diff --git a/lib/xray/xray_allocator.h b/lib/xray/xray_allocator.h
index c05c0485c..5d0a62547 100644
--- a/lib/xray/xray_allocator.h
+++ b/lib/xray/xray_allocator.h
@@ -16,10 +16,11 @@
#ifndef XRAY_ALLOCATOR_H
#define XRAY_ALLOCATOR_H
-#include "sanitizer_common/sanitizer_allocator_internal.h"
#include "sanitizer_common/sanitizer_common.h"
#include "sanitizer_common/sanitizer_internal_defs.h"
#include "sanitizer_common/sanitizer_mutex.h"
+#include "sanitizer_common/sanitizer_posix.h"
+#include "sys/mman.h"
#include "xray_utils.h"
#include <cstddef>
#include <cstdint>
@@ -36,130 +37,87 @@ namespace __xray {
/// allocation function. N is used to compute the size of a block, which is
/// cache-line-size multiples worth of memory. We compute the size of a block by
/// determining how many cache lines worth of memory is required to subsume N.
+///
+/// The Allocator instance will manage its own memory acquired through mmap.
+/// This severely constrains the platforms on which this can be used to POSIX
+/// systems where mmap semantics are well-defined.
+///
+/// FIXME: Isolate the lower-level memory management to a different abstraction
+/// that can be platform-specific.
template <size_t N> struct Allocator {
// The Allocator returns memory as Block instances.
struct Block {
/// Compute the minimum cache-line size multiple that is >= N.
static constexpr auto Size = nearest_boundary(N, kCacheLineSize);
- void *Data = nullptr;
+ void *Data;
};
private:
- // A BlockLink will contain a fixed number of blocks, each with an identifier
- // to specify whether it's been handed out or not. We keep track of BlockLink
- // iterators, which are basically a pointer to the link and an offset into
- // the fixed set of blocks associated with a link. The iterators are
- // bidirectional.
- //
- // We're calling it a "link" in the context of seeing these as a chain of
- // block pointer containers (i.e. links in a chain).
- struct BlockLink {
- static_assert(kCacheLineSize % sizeof(void *) == 0,
- "Cache line size is not divisible by size of void*; none of "
- "the assumptions of the BlockLink will hold.");
-
- // We compute the number of pointers to areas in memory where we consider as
- // individual blocks we've allocated. To ensure that instances of the
- // BlockLink object are cache-line sized, we deduct two pointers worth
- // representing the pointer to the previous link and the backing store for
- // the whole block.
- //
- // This structure corresponds to the following layout:
- //
- // Blocks [ 0, 1, 2, .., BlockPtrCount - 2]
- //
- static constexpr auto BlockPtrCount =
- (kCacheLineSize / sizeof(Block *)) - 2;
-
- BlockLink() {
- // Zero out Blocks.
- // FIXME: Use a braced member initializer when we drop support for GCC
- // 4.8.
- internal_memset(Blocks, 0, sizeof(Blocks));
- }
-
- // FIXME: Align this to cache-line address boundaries?
- Block Blocks[BlockPtrCount];
- BlockLink *Prev = nullptr;
- void *BackingStore = nullptr;
- };
-
- static_assert(sizeof(BlockLink) == kCacheLineSize,
- "BlockLink instances must be cache-line-sized.");
+ const size_t MaxMemory{0};
+ void *BackingStore = nullptr;
+ void *AlignedNextBlock = nullptr;
+ size_t AllocatedBlocks = 0;
+ SpinMutex Mutex{};
- static BlockLink NullLink;
+ void *Alloc() {
+ SpinMutexLock Lock(&Mutex);
+ if (UNLIKELY(BackingStore == nullptr)) {
+ BackingStore = reinterpret_cast<void *>(
+ internal_mmap(NULL, MaxMemory, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, 0, 0));
+ if (BackingStore == MAP_FAILED) {
+ BackingStore = nullptr;
+ if (Verbosity())
+ Report("XRay Profiling: Failed to allocate memory for allocator.\n");
+ return nullptr;
+ }
+
+ AlignedNextBlock = BackingStore;
+
+ // Ensure that NextBlock is aligned appropriately.
+ auto BackingStoreNum = reinterpret_cast<uintptr_t>(BackingStore);
+ auto AlignedNextBlockNum = nearest_boundary(
+ reinterpret_cast<uintptr_t>(AlignedNextBlock), kCacheLineSize);
+ if (diff(AlignedNextBlockNum, BackingStoreNum) > ptrdiff_t(MaxMemory)) {
+ munmap(BackingStore, MaxMemory);
+ AlignedNextBlock = BackingStore = nullptr;
+ if (Verbosity())
+ Report("XRay Profiling: Cannot obtain enough memory from "
+ "preallocated region.\n");
+ return nullptr;
+ }
+
+ AlignedNextBlock = reinterpret_cast<void *>(AlignedNextBlockNum);
+
+ // Assert that AlignedNextBlock is cache-line aligned.
+ DCHECK_EQ(reinterpret_cast<uintptr_t>(AlignedNextBlock) % kCacheLineSize,
+ 0);
+ }
- // FIXME: Implement a freelist, in case we actually do intend to return memory
- // to the allocator, as opposed to just de-allocating everything in one go?
+ if ((AllocatedBlocks * Block::Size) >= MaxMemory)
+ return nullptr;
- size_t MaxMemory;
- SpinMutex Mutex{};
- BlockLink *Tail = &NullLink;
- size_t Counter = 0;
-
- BlockLink *NewChainLink(uint64_t Alignment) {
- auto NewChain = reinterpret_cast<BlockLink *>(
- InternalAlloc(sizeof(BlockLink), nullptr, kCacheLineSize));
- auto BackingStore = reinterpret_cast<char *>(InternalAlloc(
- (BlockLink::BlockPtrCount + 1) * Block::Size, nullptr, Alignment));
- size_t Offset = 0;
- DCHECK_NE(NewChain, nullptr);
- DCHECK_NE(BackingStore, nullptr);
- NewChain->BackingStore = BackingStore;
-
- // Here we ensure that the alignment of the pointers we're handing out
- // adhere to the alignment requirements of the call to Allocate().
- for (auto &B : NewChain->Blocks) {
- auto AlignmentAdjustment =
- nearest_boundary(reinterpret_cast<uintptr_t>(BackingStore + Offset),
- Alignment) -
- reinterpret_cast<uintptr_t>(BackingStore + Offset);
- B.Data = BackingStore + AlignmentAdjustment + Offset;
- DCHECK_EQ(reinterpret_cast<uintptr_t>(B.Data) % Alignment, 0);
- Offset += AlignmentAdjustment + Block::Size;
- }
- NewChain->Prev = Tail;
- return NewChain;
+ // Align the pointer we'd like to return to an appropriate alignment, then
+ // advance the pointer from where to start allocations.
+ void *Result = AlignedNextBlock;
+ AlignedNextBlock = reinterpret_cast<void *>(
+ reinterpret_cast<char *>(AlignedNextBlock) + N);
+ ++AllocatedBlocks;
+ return Result;
}
public:
- Allocator(size_t M, size_t PreAllocate) : MaxMemory(M) {
- // FIXME: Implement PreAllocate support!
- }
-
- Block Allocate(uint64_t Alignment = 8) {
- SpinMutexLock Lock(&Mutex);
- // Check whether we're over quota.
- if (Counter * Block::Size >= MaxMemory)
- return {};
+ explicit Allocator(size_t M)
+ : MaxMemory(nearest_boundary(M, kCacheLineSize)) {}
- size_t ChainOffset = Counter % BlockLink::BlockPtrCount;
-
- Block B{};
- BlockLink *Link = Tail;
- if (UNLIKELY(Counter == 0 || ChainOffset == 0))
- Tail = Link = NewChainLink(Alignment);
-
- B = Link->Blocks[ChainOffset];
- ++Counter;
- return B;
- }
+ Block Allocate() { return {Alloc()}; }
~Allocator() NOEXCEPT {
- // We need to deallocate all the blocks, including the chain links.
- for (auto *C = Tail; C != &NullLink;) {
- // We know that the data block is a large contiguous page, we deallocate
- // that at once.
- InternalFree(C->BackingStore);
- auto Prev = C->Prev;
- InternalFree(C);
- C = Prev;
+ if (BackingStore != nullptr) {
+ internal_munmap(BackingStore, MaxMemory);
}
}
-}; // namespace __xray
-
-// Storage for the NullLink sentinel.
-template <size_t N> typename Allocator<N>::BlockLink Allocator<N>::NullLink;
+};
} // namespace __xray
diff --git a/lib/xray/xray_function_call_trie.h b/lib/xray/xray_function_call_trie.h
index 6c4710584..e6e31cec2 100644
--- a/lib/xray/xray_function_call_trie.h
+++ b/lib/xray/xray_function_call_trie.h
@@ -15,6 +15,7 @@
#ifndef XRAY_FUNCTION_CALL_TRIE_H
#define XRAY_FUNCTION_CALL_TRIE_H
+#include "sanitizer_common/sanitizer_allocator_internal.h"
#include "xray_profiling_flags.h"
#include "xray_segmented_array.h"
#include <memory> // For placement new.
@@ -118,9 +119,9 @@ public:
// We add a constructor here to allow us to inplace-construct through
// Array<...>'s AppendEmplace.
- Node(Node *P, NodeIdPairAllocatorType &A, int64_t CC, int64_t CLT,
- int32_t F)
- : Parent(P), Callees(A), CallCount(CC), CumulativeLocalTime(CLT),
+ Node(Node *P, NodeIdPairAllocatorType &A, ChunkAllocator &CA, int64_t CC,
+ int64_t CLT, int32_t F)
+ : Parent(P), Callees(A, CA), CallCount(CC), CumulativeLocalTime(CLT),
FId(F) {}
// TODO: Include the compact histogram.
@@ -152,6 +153,7 @@ public:
RootAllocatorType *RootAllocator = nullptr;
ShadowStackAllocatorType *ShadowStackAllocator = nullptr;
NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
+ ChunkAllocator *ChunkAlloc = nullptr;
Allocators() {}
Allocators(const Allocators &) = delete;
@@ -160,11 +162,12 @@ public:
Allocators(Allocators &&O)
: NodeAllocator(O.NodeAllocator), RootAllocator(O.RootAllocator),
ShadowStackAllocator(O.ShadowStackAllocator),
- NodeIdPairAllocator(O.NodeIdPairAllocator) {
+ NodeIdPairAllocator(O.NodeIdPairAllocator), ChunkAlloc(O.ChunkAlloc) {
O.NodeAllocator = nullptr;
O.RootAllocator = nullptr;
O.ShadowStackAllocator = nullptr;
O.NodeIdPairAllocator = nullptr;
+ O.ChunkAlloc = nullptr;
}
Allocators &operator=(Allocators &&O) {
@@ -188,6 +191,11 @@ public:
O.NodeIdPairAllocator = this->NodeIdPairAllocator;
this->NodeIdPairAllocator = Tmp;
}
+ {
+ auto Tmp = O.ChunkAlloc;
+ O.ChunkAlloc = this->ChunkAlloc;
+ this->ChunkAlloc = Tmp;
+ }
return *this;
}
@@ -198,18 +206,27 @@ public:
if (NodeAllocator != nullptr) {
NodeAllocator->~NodeAllocatorType();
InternalFree(NodeAllocator);
+ NodeAllocator = nullptr;
}
if (RootAllocator != nullptr) {
RootAllocator->~RootAllocatorType();
InternalFree(RootAllocator);
+ RootAllocator = nullptr;
}
if (ShadowStackAllocator != nullptr) {
ShadowStackAllocator->~ShadowStackAllocatorType();
InternalFree(ShadowStackAllocator);
+ ShadowStackAllocator = nullptr;
}
if (NodeIdPairAllocator != nullptr) {
NodeIdPairAllocator->~NodeIdPairAllocatorType();
InternalFree(NodeIdPairAllocator);
+ NodeIdPairAllocator = nullptr;
+ }
+ if (ChunkAlloc != nullptr) {
+ ChunkAlloc->~ChunkAllocator();
+ InternalFree(ChunkAlloc);
+ ChunkAlloc = nullptr;
}
}
};
@@ -223,24 +240,29 @@ public:
Allocators A;
auto NodeAllocator = reinterpret_cast<Allocators::NodeAllocatorType *>(
InternalAlloc(sizeof(Allocators::NodeAllocatorType)));
- new (NodeAllocator) Allocators::NodeAllocatorType(Max, 0);
+ new (NodeAllocator) Allocators::NodeAllocatorType(Max);
A.NodeAllocator = NodeAllocator;
auto RootAllocator = reinterpret_cast<Allocators::RootAllocatorType *>(
InternalAlloc(sizeof(Allocators::RootAllocatorType)));
- new (RootAllocator) Allocators::RootAllocatorType(Max, 0);
+ new (RootAllocator) Allocators::RootAllocatorType(Max);
A.RootAllocator = RootAllocator;
auto ShadowStackAllocator =
reinterpret_cast<Allocators::ShadowStackAllocatorType *>(
InternalAlloc(sizeof(Allocators::ShadowStackAllocatorType)));
- new (ShadowStackAllocator) Allocators::ShadowStackAllocatorType(Max, 0);
+ new (ShadowStackAllocator) Allocators::ShadowStackAllocatorType(Max);
A.ShadowStackAllocator = ShadowStackAllocator;
auto NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
InternalAlloc(sizeof(NodeIdPairAllocatorType)));
- new (NodeIdPairAllocator) NodeIdPairAllocatorType(Max, 0);
+ new (NodeIdPairAllocator) NodeIdPairAllocatorType(Max);
A.NodeIdPairAllocator = NodeIdPairAllocator;
+
+ auto ChunkAlloc = reinterpret_cast<ChunkAllocator *>(
+ InternalAlloc(sizeof(ChunkAllocator)));
+ new (ChunkAlloc) ChunkAllocator(Max);
+ A.ChunkAlloc = ChunkAlloc;
return A;
}
@@ -249,20 +271,22 @@ private:
RootArray Roots;
ShadowStackArray ShadowStack;
NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
+ ChunkAllocator *ChunkAlloc = nullptr;
public:
explicit FunctionCallTrie(const Allocators &A)
- : Nodes(*A.NodeAllocator), Roots(*A.RootAllocator),
- ShadowStack(*A.ShadowStackAllocator),
- NodeIdPairAllocator(A.NodeIdPairAllocator) {}
+ : Nodes(*A.NodeAllocator, *A.ChunkAlloc),
+ Roots(*A.RootAllocator, *A.ChunkAlloc),
+ ShadowStack(*A.ShadowStackAllocator, *A.ChunkAlloc),
+ NodeIdPairAllocator(A.NodeIdPairAllocator), ChunkAlloc(A.ChunkAlloc) {}
void enterFunction(const int32_t FId, uint64_t TSC) {
DCHECK_NE(FId, 0);
// This function primarily deals with ensuring that the ShadowStack is
// consistent and ready for when an exit event is encountered.
if (UNLIKELY(ShadowStack.empty())) {
- auto NewRoot =
- Nodes.AppendEmplace(nullptr, *NodeIdPairAllocator, 0, 0, FId);
+ auto NewRoot = Nodes.AppendEmplace(nullptr, *NodeIdPairAllocator,
+ *ChunkAlloc, 0, 0, FId);
if (UNLIKELY(NewRoot == nullptr))
return;
Roots.Append(NewRoot);
@@ -285,8 +309,8 @@ public:
}
// This means we've never seen this stack before, create a new node here.
- auto NewNode =
- Nodes.AppendEmplace(TopNode, *NodeIdPairAllocator, 0, 0, FId);
+ auto NewNode = Nodes.AppendEmplace(TopNode, *NodeIdPairAllocator,
+ *ChunkAlloc, 0, 0, FId);
if (UNLIKELY(NewNode == nullptr))
return;
DCHECK_NE(NewNode, nullptr);
@@ -345,13 +369,14 @@ public:
using Stack = Array<NodeAndParent>;
typename Stack::AllocatorType StackAllocator(
- profilingFlags()->stack_allocator_max, 0);
- Stack DFSStack(StackAllocator);
+ profilingFlags()->stack_allocator_max);
+ ChunkAllocator StackChunkAllocator(profilingFlags()->stack_allocator_max);
+ Stack DFSStack(StackAllocator, StackChunkAllocator);
for (const auto Root : getRoots()) {
// Add a node in O for this root.
auto NewRoot = O.Nodes.AppendEmplace(
- nullptr, *O.NodeIdPairAllocator, Root->CallCount,
+ nullptr, *O.NodeIdPairAllocator, *O.ChunkAlloc, Root->CallCount,
Root->CumulativeLocalTime, Root->FId);
// Because we cannot allocate more memory we should bail out right away.
@@ -370,8 +395,9 @@ public:
DFSStack.trim(1);
for (const auto Callee : NP.Node->Callees) {
auto NewNode = O.Nodes.AppendEmplace(
- NP.NewNode, *O.NodeIdPairAllocator, Callee.NodePtr->CallCount,
- Callee.NodePtr->CumulativeLocalTime, Callee.FId);
+ NP.NewNode, *O.NodeIdPairAllocator, *O.ChunkAlloc,
+ Callee.NodePtr->CallCount, Callee.NodePtr->CumulativeLocalTime,
+ Callee.FId);
if (UNLIKELY(NewNode == nullptr))
return;
NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId);
@@ -396,16 +422,17 @@ public:
};
using Stack = Array<NodeAndTarget>;
typename Stack::AllocatorType StackAllocator(
- profilingFlags()->stack_allocator_max, 0);
- Stack DFSStack(StackAllocator);
+ profilingFlags()->stack_allocator_max);
+ ChunkAllocator CA(profilingFlags()->stack_allocator_max);
+ Stack DFSStack(StackAllocator, CA);
for (const auto Root : getRoots()) {
Node *TargetRoot = nullptr;
auto R = O.Roots.find_element(
[&](const Node *Node) { return Node->FId == Root->FId; });
if (R == nullptr) {
- TargetRoot = O.Nodes.AppendEmplace(nullptr, *O.NodeIdPairAllocator, 0,
- 0, Root->FId);
+ TargetRoot = O.Nodes.AppendEmplace(nullptr, *O.NodeIdPairAllocator,
+ *O.ChunkAlloc, 0, 0, Root->FId);
if (UNLIKELY(TargetRoot == nullptr))
return;
@@ -429,8 +456,9 @@ public:
return C.FId == Callee.FId;
});
if (TargetCallee == nullptr) {
- auto NewTargetNode = O.Nodes.AppendEmplace(
- NT.TargetNode, *O.NodeIdPairAllocator, 0, 0, Callee.FId);
+ auto NewTargetNode =
+ O.Nodes.AppendEmplace(NT.TargetNode, *O.NodeIdPairAllocator,
+ *O.ChunkAlloc, 0, 0, Callee.FId);
if (UNLIKELY(NewTargetNode == nullptr))
return;
diff --git a/lib/xray/xray_profile_collector.cc b/lib/xray/xray_profile_collector.cc
index 972649193..ba347d13b 100644
--- a/lib/xray/xray_profile_collector.cc
+++ b/lib/xray/xray_profile_collector.cc
@@ -13,6 +13,7 @@
//
//===----------------------------------------------------------------------===//
#include "xray_profile_collector.h"
+#include "sanitizer_common/sanitizer_allocator_internal.h"
#include "sanitizer_common/sanitizer_common.h"
#include "sanitizer_common/sanitizer_vector.h"
#include "xray_profiling_flags.h"
@@ -117,11 +118,12 @@ struct ProfileRecord {
const FunctionCallTrie::Node *Node = nullptr;
// Constructor for in-place construction.
- ProfileRecord(PathAllocator &A, const FunctionCallTrie::Node *N)
+ ProfileRecord(PathAllocator &A, ChunkAllocator &CA,
+ const FunctionCallTrie::Node *N)
: Path([&] {
auto P =
reinterpret_cast<PathArray *>(InternalAlloc(sizeof(PathArray)));
- new (P) PathArray(A);
+ new (P) PathArray(A, CA);
return P;
}()),
Node(N) {}
@@ -135,17 +137,20 @@ using ProfileRecordArray = Array<ProfileRecord>;
// the path(s) and the data associated with the path.
static void populateRecords(ProfileRecordArray &PRs,
ProfileRecord::PathAllocator &PA,
- const FunctionCallTrie &Trie) {
+ ChunkAllocator &CA, const FunctionCallTrie &Trie) {
using StackArray = Array<const FunctionCallTrie::Node *>;
using StackAllocator = typename StackArray::AllocatorType;
- StackAllocator StackAlloc(profilingFlags()->stack_allocator_max, 0);
- StackArray DFSStack(StackAlloc);
+ StackAllocator StackAlloc(profilingFlags()->stack_allocator_max);
+ ChunkAllocator StackChunkAlloc(profilingFlags()->stack_allocator_max);
+ StackArray DFSStack(StackAlloc, StackChunkAlloc);
for (const auto R : Trie.getRoots()) {
DFSStack.Append(R);
while (!DFSStack.empty()) {
auto Node = DFSStack.back();
DFSStack.trim(1);
- auto Record = PRs.AppendEmplace(PA, Node);
+ auto Record = PRs.AppendEmplace(PA, CA, Node);
+ if (Record == nullptr)
+ return;
DCHECK_NE(Record, nullptr);
// Traverse the Node's parents and as we're doing so, get the FIds in
@@ -208,10 +213,11 @@ void serialize() {
// Then repopulate the global ProfileBuffers.
for (u32 I = 0; I < ThreadTries->Size(); ++I) {
using ProfileRecordAllocator = typename ProfileRecordArray::AllocatorType;
- ProfileRecordAllocator PRAlloc(profilingFlags()->global_allocator_max, 0);
+ ProfileRecordAllocator PRAlloc(profilingFlags()->global_allocator_max);
+ ChunkAllocator CA(profilingFlags()->global_allocator_max);
ProfileRecord::PathAllocator PathAlloc(
- profilingFlags()->global_allocator_max, 0);
- ProfileRecordArray ProfileRecords(PRAlloc);
+ profilingFlags()->global_allocator_max);
+ ProfileRecordArray ProfileRecords(PRAlloc, CA);
// First, we want to compute the amount of space we're going to need. We'll
// use a local allocator and an __xray::Array<...> to store the intermediary
@@ -220,7 +226,7 @@ void serialize() {
const auto &Trie = *(*ThreadTries)[I].Trie;
if (Trie.getRoots().empty())
continue;
- populateRecords(ProfileRecords, PathAlloc, Trie);
+ populateRecords(ProfileRecords, PathAlloc, CA, Trie);
DCHECK(!Trie.getRoots().empty());
DCHECK(!ProfileRecords.empty());
diff --git a/lib/xray/xray_profiling.cc b/lib/xray/xray_profiling.cc
index 2c5b82959..3a0c0b90e 100644
--- a/lib/xray/xray_profiling.cc
+++ b/lib/xray/xray_profiling.cc
@@ -56,8 +56,9 @@ struct alignas(64) ProfilingData {
static pthread_key_t ProfilingKey;
-ProfilingData &getThreadLocalData() XRAY_NEVER_INSTRUMENT {
- thread_local std::aligned_storage<sizeof(ProfilingData)>::type ThreadStorage;
+thread_local std::aligned_storage<sizeof(ProfilingData)>::type ThreadStorage{};
+
+static ProfilingData &getThreadLocalData() XRAY_NEVER_INSTRUMENT {
if (pthread_getspecific(ProfilingKey) == NULL) {
new (&ThreadStorage) ProfilingData{};
pthread_setspecific(ProfilingKey, &ThreadStorage);
@@ -86,6 +87,18 @@ ProfilingData &getThreadLocalData() XRAY_NEVER_INSTRUMENT {
return TLD;
}
+static void cleanupTLD() XRAY_NEVER_INSTRUMENT {
+ auto &TLD = *reinterpret_cast<ProfilingData *>(&ThreadStorage);
+ if (TLD.Allocators != nullptr && TLD.FCT != nullptr) {
+ TLD.FCT->~FunctionCallTrie();
+ TLD.Allocators->~Allocators();
+ InternalFree(TLD.FCT);
+ InternalFree(TLD.Allocators);
+ TLD.FCT = nullptr;
+ TLD.Allocators = nullptr;
+ }
+}
+
} // namespace
const char *profilingCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT {
@@ -148,6 +161,9 @@ XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT {
profileCollectorService::reset();
+ // Flush the current thread's local data structures as well.
+ cleanupTLD();
+
atomic_store(&ProfilerLogStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
memory_order_release);
@@ -158,17 +174,12 @@ namespace {
thread_local atomic_uint8_t ReentranceGuard{0};
-void postCurrentThreadFCT(ProfilingData &TLD) {
+static void postCurrentThreadFCT(ProfilingData &TLD) {
if (TLD.Allocators == nullptr || TLD.FCT == nullptr)
return;
profileCollectorService::post(*TLD.FCT, GetTid());
- TLD.FCT->~FunctionCallTrie();
- TLD.Allocators->~Allocators();
- InternalFree(TLD.FCT);
- InternalFree(TLD.Allocators);
- TLD.FCT = nullptr;
- TLD.Allocators = nullptr;
+ cleanupTLD();
}
} // namespace
@@ -294,10 +305,14 @@ profilingLoggingInit(size_t BufferSize, size_t BufferMax, void *Options,
// ABI functions for registering exit handlers.
Atexit(+[] {
// Finalize and flush.
- if (profilingFinalize() != XRAY_LOG_FINALIZED)
+ if (profilingFinalize() != XRAY_LOG_FINALIZED) {
+ cleanupTLD();
return;
- if (profilingFlush() != XRAY_LOG_FLUSHED)
+ }
+ if (profilingFlush() != XRAY_LOG_FLUSHED) {
+ cleanupTLD();
return;
+ }
if (Verbosity())
Report("XRay Profile flushed at exit.");
});
diff --git a/lib/xray/xray_profiling_flags.inc b/lib/xray/xray_profiling_flags.inc
index 04ccd459d..e9230ae64 100644
--- a/lib/xray/xray_profiling_flags.inc
+++ b/lib/xray/xray_profiling_flags.inc
@@ -18,7 +18,7 @@ XRAY_FLAG(uptr, per_thread_allocator_max, 2 << 20,
"Maximum size of any single per-thread allocator.")
XRAY_FLAG(uptr, global_allocator_max, 2 << 24,
"Maximum size of the global allocator for profile storage.")
-XRAY_FLAG(uptr, stack_allocator_max, 2 << 24,
+XRAY_FLAG(uptr, stack_allocator_max, 2 << 20,
"Maximum size of the traversal stack allocator.")
XRAY_FLAG(int, grace_period_ms, 1,
"Profile collection will wait this much time in milliseconds before "
diff --git a/lib/xray/xray_segmented_array.h b/lib/xray/xray_segmented_array.h
index ddfdfb3c7..f001b230c 100644
--- a/lib/xray/xray_segmented_array.h
+++ b/lib/xray/xray_segmented_array.h
@@ -24,6 +24,13 @@
#include <utility>
namespace __xray {
+struct Chunk {
+ void *Data;
+ Chunk *Prev;
+ Chunk *Next;
+};
+
+using ChunkAllocator = Allocator<next_pow2(sizeof(Chunk)) * 8>;
/// The Array type provides an interface similar to std::vector<...> but does
/// not shrink in size. Once constructed, elements can be appended but cannot be
@@ -51,16 +58,10 @@ private:
// TODO: Consider co-locating the chunk information with the data in the
// Block, as in an intrusive list -- i.e. putting the next and previous
// pointer values inside the Block storage.
- struct Chunk {
- typename AllocatorType::Block Block;
- static constexpr size_t Size = N;
- Chunk *Prev = this;
- Chunk *Next = this;
- };
-
static Chunk SentinelChunk;
AllocatorType *Alloc;
+ ChunkAllocator *ChunkAlloc;
Chunk *Head = &SentinelChunk;
Chunk *Tail = &SentinelChunk;
size_t Size = 0;
@@ -82,30 +83,19 @@ private:
return FreeChunk;
}
- auto Block = Alloc->Allocate(ElementStorageSize);
+ auto Block = Alloc->Allocate();
if (Block.Data == nullptr)
return nullptr;
- // TODO: Maybe use a separate managed allocator for Chunk instances?
- auto C = reinterpret_cast<Chunk *>(
- InternalAlloc(sizeof(Chunk), nullptr, kCacheLineSize));
- if (C == nullptr)
+ auto ChunkElement = ChunkAlloc->Allocate();
+ if (ChunkElement.Data == nullptr)
return nullptr;
- C->Block = Block;
- C->Prev = &SentinelChunk;
- C->Next = &SentinelChunk;
- return C;
- }
- static AllocatorType &GetGlobalAllocator() {
- static AllocatorType *const GlobalAllocator = [] {
- AllocatorType *A = reinterpret_cast<AllocatorType *>(
- InternalAlloc(sizeof(AllocatorType)));
- new (A) AllocatorType(2 << 10, 0);
- return A;
- }();
-
- return *GlobalAllocator;
+ // Placement-new the Chunk element at the appropriate location.
+ auto C = reinterpret_cast<Chunk *>(ChunkElement.Data);
+ Chunk LocalC{Block.Data, &SentinelChunk, &SentinelChunk};
+ internal_memcpy(C, &LocalC, sizeof(LocalC));
+ return C;
}
Chunk *InitHeadAndTail() {
@@ -201,21 +191,21 @@ private:
U &operator*() const {
DCHECK_NE(C, &SentinelChunk);
auto RelOff = Offset % N;
- return *reinterpret_cast<U *>(reinterpret_cast<char *>(C->Block.Data) +
+ return *reinterpret_cast<U *>(reinterpret_cast<char *>(C->Data) +
(RelOff * ElementStorageSize));
}
U *operator->() const {
DCHECK_NE(C, &SentinelChunk);
auto RelOff = Offset % N;
- return reinterpret_cast<U *>(reinterpret_cast<char *>(C->Block.Data) +
+ return reinterpret_cast<U *>(reinterpret_cast<char *>(C->Data) +
(RelOff * ElementStorageSize));
}
};
public:
- explicit Array(AllocatorType &A) : Alloc(&A) {}
- Array() : Array(GetGlobalAllocator()) {}
+ explicit Array(AllocatorType &A, ChunkAllocator &CA)
+ : Alloc(&A), ChunkAlloc(&CA) {}
Array(const Array &) = delete;
Array(Array &&O) NOEXCEPT : Alloc(O.Alloc),
@@ -246,9 +236,8 @@ public:
if (AppendNewChunk() == nullptr)
return nullptr;
- auto Position =
- reinterpret_cast<T *>(reinterpret_cast<char *>(Tail->Block.Data) +
- (Offset * ElementStorageSize));
+ auto Position = reinterpret_cast<T *>(reinterpret_cast<char *>(Tail->Data) +
+ (Offset * ElementStorageSize));
*Position = E;
++Size;
return Position;
@@ -268,7 +257,7 @@ public:
}
DCHECK_NE(Tail, &SentinelChunk);
- auto Position = reinterpret_cast<char *>(LatestChunk->Block.Data) +
+ auto Position = reinterpret_cast<char *>(LatestChunk->Data) +
(Offset * ElementStorageSize);
DCHECK_EQ(reinterpret_cast<uintptr_t>(Position) % ElementStorageSize, 0);
// In-place construct at Position.
@@ -286,7 +275,7 @@ public:
Offset -= N;
DCHECK_NE(C, &SentinelChunk);
}
- return *reinterpret_cast<T *>(reinterpret_cast<char *>(C->Block.Data) +
+ return *reinterpret_cast<T *>(reinterpret_cast<char *>(C->Data) +
(Offset * ElementStorageSize));
}
@@ -360,7 +349,8 @@ public:
// ensure that storage for the SentinelChunk is defined and has a single
// address.
template <class T, size_t N>
-typename Array<T, N>::Chunk Array<T, N>::SentinelChunk;
+Chunk Array<T, N>::SentinelChunk{nullptr, &Array<T, N>::SentinelChunk,
+ &Array<T, N>::SentinelChunk};
} // namespace __xray
diff --git a/lib/xray/xray_utils.h b/lib/xray/xray_utils.h
index ce64528f9..eafa16e1a 100644
--- a/lib/xray/xray_utils.h
+++ b/lib/xray/xray_utils.h
@@ -15,6 +15,8 @@
#ifndef XRAY_UTILS_H
#define XRAY_UTILS_H
+#include <cstddef>
+#include <cstdint>
#include <sys/types.h>
#include <utility>
@@ -54,6 +56,14 @@ constexpr size_t next_pow2(size_t number) {
return next_pow2_helper(number, 1);
}
+template <class T> constexpr T &max(T &A, T &B) { return A > B ? A : B; }
+
+template <class T> constexpr T &min(T &A, T &B) { return A <= B ? A : B; }
+
+constexpr ptrdiff_t diff(uintptr_t A, uintptr_t B) {
+ return max(A, B) - min(A, B);
+}
+
} // namespace __xray
#endif // XRAY_UTILS_H