summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorDean Michael Berris <dberris@google.com>2018-07-13 04:04:18 +0000
committerDean Michael Berris <dberris@google.com>2018-07-13 04:04:18 +0000
commite7ab4a270ce5bbeb83b76a08cbb78595dc51ce7d (patch)
tree75ef7312a40591100c8694ebc0cd886d9a2b2ce4 /lib
parentce37cc940dd3c634978038ae81f748289863dd32 (diff)
[XRay][compiler-rt] Profiling Mode: Flush logs on exit
Summary: This change adds support for writing out profiles at program exit. Depends on D48653. Reviewers: kpw, eizan Reviewed By: kpw Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D48956 git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@336969 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/xray/xray_profile_collector.cc81
-rw-r--r--lib/xray/xray_profiling.cc30
-rw-r--r--lib/xray/xray_profiling_flags.inc2
3 files changed, 79 insertions, 34 deletions
diff --git a/lib/xray/xray_profile_collector.cc b/lib/xray/xray_profile_collector.cc
index 5da8073f5..972649193 100644
--- a/lib/xray/xray_profile_collector.cc
+++ b/lib/xray/xray_profile_collector.cc
@@ -30,13 +30,11 @@ struct ThreadTrie {
tid_t TId;
FunctionCallTrie *Trie;
};
-Vector<ThreadTrie> ThreadTries;
struct ProfileBuffer {
void *Data;
size_t Size;
};
-Vector<ProfileBuffer> ProfileBuffers;
struct BlockHeader {
u32 BlockSize;
@@ -44,6 +42,10 @@ struct BlockHeader {
u64 ThreadId;
};
+// These need to be pointers that point to heap/internal-allocator-allocated
+// objects because these are accessed even at program exit.
+Vector<ThreadTrie> *ThreadTries = nullptr;
+Vector<ProfileBuffer> *ProfileBuffers = nullptr;
FunctionCallTrie::Allocators *GlobalAllocators = nullptr;
} // namespace
@@ -57,8 +59,16 @@ void post(const FunctionCallTrie &T, tid_t TId) {
new (GlobalAllocators) FunctionCallTrie::Allocators();
*GlobalAllocators = FunctionCallTrie::InitAllocatorsCustom(
profilingFlags()->global_allocator_max);
+ ThreadTries = reinterpret_cast<Vector<ThreadTrie> *>(
+ InternalAlloc(sizeof(Vector<ThreadTrie>)));
+ new (ThreadTries) Vector<ThreadTrie>();
+ ProfileBuffers = reinterpret_cast<Vector<ProfileBuffer> *>(
+ InternalAlloc(sizeof(Vector<ProfileBuffer>)));
+ new (ProfileBuffers) Vector<ProfileBuffer>();
});
DCHECK_NE(GlobalAllocators, nullptr);
+ DCHECK_NE(ThreadTries, nullptr);
+ DCHECK_NE(ProfileBuffers, nullptr);
ThreadTrie *Item = nullptr;
{
@@ -66,7 +76,7 @@ void post(const FunctionCallTrie &T, tid_t TId) {
if (GlobalAllocators == nullptr)
return;
- Item = ThreadTries.PushBack();
+ Item = ThreadTries->PushBack();
Item->TId = TId;
// Here we're using the internal allocator instead of the managed allocator
@@ -188,15 +198,15 @@ void serialize() {
SpinMutexLock Lock(&GlobalMutex);
// Clear out the global ProfileBuffers.
- for (uptr I = 0; I < ProfileBuffers.Size(); ++I)
- InternalFree(ProfileBuffers[I].Data);
- ProfileBuffers.Reset();
+ for (uptr I = 0; I < ProfileBuffers->Size(); ++I)
+ InternalFree((*ProfileBuffers)[I].Data);
+ ProfileBuffers->Reset();
- if (ThreadTries.Size() == 0)
+ if (ThreadTries->Size() == 0)
return;
// Then repopulate the global ProfileBuffers.
- for (u32 I = 0; I < ThreadTries.Size(); ++I) {
+ for (u32 I = 0; I < ThreadTries->Size(); ++I) {
using ProfileRecordAllocator = typename ProfileRecordArray::AllocatorType;
ProfileRecordAllocator PRAlloc(profilingFlags()->global_allocator_max, 0);
ProfileRecord::PathAllocator PathAlloc(
@@ -207,7 +217,7 @@ void serialize() {
// use a local allocator and an __xray::Array<...> to store the intermediary
// data, then compute the size as we're going along. Then we'll allocate the
// contiguous space to contain the thread buffer data.
- const auto &Trie = *ThreadTries[I].Trie;
+ const auto &Trie = *(*ThreadTries)[I].Trie;
if (Trie.getRoots().empty())
continue;
populateRecords(ProfileRecords, PathAlloc, Trie);
@@ -227,8 +237,8 @@ void serialize() {
for (const auto &Record : ProfileRecords)
CumulativeSizes += 20 + (4 * Record.Path->size());
- BlockHeader Header{16 + CumulativeSizes, I, ThreadTries[I].TId};
- auto Buffer = ProfileBuffers.PushBack();
+ BlockHeader Header{16 + CumulativeSizes, I, (*ThreadTries)[I].TId};
+ auto Buffer = ProfileBuffers->PushBack();
Buffer->Size = sizeof(Header) + CumulativeSizes;
Buffer->Data = InternalAlloc(Buffer->Size, nullptr, 64);
DCHECK_NE(Buffer->Data, nullptr);
@@ -244,18 +254,26 @@ void serialize() {
void reset() {
SpinMutexLock Lock(&GlobalMutex);
- // Clear out the profile buffers that have been serialized.
- for (uptr I = 0; I < ProfileBuffers.Size(); ++I)
- InternalFree(ProfileBuffers[I].Data);
- ProfileBuffers.Reset();
-
- // Clear out the function call tries per thread.
- for (uptr I = 0; I < ThreadTries.Size(); ++I) {
- auto &T = ThreadTries[I];
- T.Trie->~FunctionCallTrie();
- InternalFree(T.Trie);
+ if (ProfileBuffers != nullptr) {
+ // Clear out the profile buffers that have been serialized.
+ for (uptr I = 0; I < ProfileBuffers->Size(); ++I)
+ InternalFree((*ProfileBuffers)[I].Data);
+ ProfileBuffers->Reset();
+ InternalFree(ProfileBuffers);
+ ProfileBuffers = nullptr;
+ }
+
+ if (ThreadTries != nullptr) {
+ // Clear out the function call tries per thread.
+ for (uptr I = 0; I < ThreadTries->Size(); ++I) {
+ auto &T = (*ThreadTries)[I];
+ T.Trie->~FunctionCallTrie();
+ InternalFree(T.Trie);
+ }
+ ThreadTries->Reset();
+ InternalFree(ThreadTries);
+ ThreadTries = nullptr;
}
- ThreadTries.Reset();
// Reset the global allocators.
if (GlobalAllocators != nullptr) {
@@ -267,18 +285,29 @@ void reset() {
InternalAlloc(sizeof(FunctionCallTrie::Allocators)));
new (GlobalAllocators) FunctionCallTrie::Allocators();
*GlobalAllocators = FunctionCallTrie::InitAllocators();
+ ThreadTries = reinterpret_cast<Vector<ThreadTrie> *>(
+ InternalAlloc(sizeof(Vector<ThreadTrie>)));
+ new (ThreadTries) Vector<ThreadTrie>();
+ ProfileBuffers = reinterpret_cast<Vector<ProfileBuffer> *>(
+ InternalAlloc(sizeof(Vector<ProfileBuffer>)));
+ new (ProfileBuffers) Vector<ProfileBuffer>();
}
XRayBuffer nextBuffer(XRayBuffer B) {
SpinMutexLock Lock(&GlobalMutex);
- if (B.Data == nullptr && ProfileBuffers.Size())
- return {ProfileBuffers[0].Data, ProfileBuffers[0].Size};
+
+ if (ProfileBuffers == nullptr || ProfileBuffers->Size() == 0)
+ return {nullptr, 0};
+
+ if (B.Data == nullptr)
+ return {(*ProfileBuffers)[0].Data, (*ProfileBuffers)[0].Size};
BlockHeader Header;
internal_memcpy(&Header, B.Data, sizeof(BlockHeader));
auto NextBlock = Header.BlockNum + 1;
- if (NextBlock < ProfileBuffers.Size())
- return {ProfileBuffers[NextBlock].Data, ProfileBuffers[NextBlock].Size};
+ if (NextBlock < ProfileBuffers->Size())
+ return {(*ProfileBuffers)[NextBlock].Data,
+ (*ProfileBuffers)[NextBlock].Size};
return {nullptr, 0};
}
diff --git a/lib/xray/xray_profiling.cc b/lib/xray/xray_profiling.cc
index fa60263c2..2c5b82959 100644
--- a/lib/xray/xray_profiling.cc
+++ b/lib/xray/xray_profiling.cc
@@ -277,7 +277,7 @@ profilingLoggingInit(size_t BufferSize, size_t BufferMax, void *Options,
// We need to reset the profile data collection implementation now.
profileCollectorService::reset();
- // We need to set up the at-thread-exit handler.
+ // We need to set up the exit handlers.
static pthread_once_t Once = PTHREAD_ONCE_INIT;
pthread_once(&Once, +[] {
pthread_key_create(&ProfilingKey, +[](void *P) {
@@ -288,6 +288,19 @@ profilingLoggingInit(size_t BufferSize, size_t BufferMax, void *Options,
postCurrentThreadFCT(TLD);
});
+
+ // We also need to set up an exit handler, so that we can get the profile
+ // information at exit time. We use the C API to do this, to not rely on C++
+ // ABI functions for registering exit handlers.
+ Atexit(+[] {
+ // Finalize and flush.
+ if (profilingFinalize() != XRAY_LOG_FINALIZED)
+ return;
+ if (profilingFlush() != XRAY_LOG_FLUSHED)
+ return;
+ if (Verbosity())
+ Report("XRay Profile flushed at exit.");
+ });
});
__xray_log_set_buffer_iterator(profileCollectorService::nextBuffer);
@@ -321,13 +334,16 @@ bool profilingDynamicInitializer() XRAY_NEVER_INSTRUMENT {
profilingFlush,
};
auto RegistrationResult = __xray_log_register_mode("xray-profiling", Impl);
- if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK &&
- Verbosity())
- Report("Cannot register XRay Profiling mode to 'xray-profiling'; error = "
- "%d\n",
- RegistrationResult);
+ if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) {
+ if (Verbosity())
+ Report("Cannot register XRay Profiling mode to 'xray-profiling'; error = "
+ "%d\n",
+ RegistrationResult);
+ return false;
+ }
+
if (!internal_strcmp(flags()->xray_mode, "xray-profiling"))
- __xray_set_log_impl(Impl);
+ __xray_log_select_mode("xray_profiling");
return true;
}
diff --git a/lib/xray/xray_profiling_flags.inc b/lib/xray/xray_profiling_flags.inc
index 616bc83ad..04ccd459d 100644
--- a/lib/xray/xray_profiling_flags.inc
+++ b/lib/xray/xray_profiling_flags.inc
@@ -20,7 +20,7 @@ XRAY_FLAG(uptr, global_allocator_max, 2 << 24,
"Maximum size of the global allocator for profile storage.")
XRAY_FLAG(uptr, stack_allocator_max, 2 << 24,
"Maximum size of the traversal stack allocator.")
-XRAY_FLAG(int, grace_period_ms, 100,
+XRAY_FLAG(int, grace_period_ms, 1,
"Profile collection will wait this much time in milliseconds before "
"resetting the global state. This gives a chance to threads to "
"notice that the profiler has been finalized and clean up.")