diff options
author | Dean Michael Berris <dberris@google.com> | 2018-07-13 04:04:18 +0000 |
---|---|---|
committer | Dean Michael Berris <dberris@google.com> | 2018-07-13 04:04:18 +0000 |
commit | e7ab4a270ce5bbeb83b76a08cbb78595dc51ce7d (patch) | |
tree | 75ef7312a40591100c8694ebc0cd886d9a2b2ce4 /lib | |
parent | ce37cc940dd3c634978038ae81f748289863dd32 (diff) |
[XRay][compiler-rt] Profiling Mode: Flush logs on exit
Summary:
This change adds support for writing out profiles at program exit.
Depends on D48653.
Reviewers: kpw, eizan
Reviewed By: kpw
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D48956
git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@336969 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/xray/xray_profile_collector.cc | 81 | ||||
-rw-r--r-- | lib/xray/xray_profiling.cc | 30 | ||||
-rw-r--r-- | lib/xray/xray_profiling_flags.inc | 2 |
3 files changed, 79 insertions, 34 deletions
diff --git a/lib/xray/xray_profile_collector.cc b/lib/xray/xray_profile_collector.cc index 5da8073f5..972649193 100644 --- a/lib/xray/xray_profile_collector.cc +++ b/lib/xray/xray_profile_collector.cc @@ -30,13 +30,11 @@ struct ThreadTrie { tid_t TId; FunctionCallTrie *Trie; }; -Vector<ThreadTrie> ThreadTries; struct ProfileBuffer { void *Data; size_t Size; }; -Vector<ProfileBuffer> ProfileBuffers; struct BlockHeader { u32 BlockSize; @@ -44,6 +42,10 @@ struct BlockHeader { u64 ThreadId; }; +// These need to be pointers that point to heap/internal-allocator-allocated +// objects because these are accessed even at program exit. +Vector<ThreadTrie> *ThreadTries = nullptr; +Vector<ProfileBuffer> *ProfileBuffers = nullptr; FunctionCallTrie::Allocators *GlobalAllocators = nullptr; } // namespace @@ -57,8 +59,16 @@ void post(const FunctionCallTrie &T, tid_t TId) { new (GlobalAllocators) FunctionCallTrie::Allocators(); *GlobalAllocators = FunctionCallTrie::InitAllocatorsCustom( profilingFlags()->global_allocator_max); + ThreadTries = reinterpret_cast<Vector<ThreadTrie> *>( + InternalAlloc(sizeof(Vector<ThreadTrie>))); + new (ThreadTries) Vector<ThreadTrie>(); + ProfileBuffers = reinterpret_cast<Vector<ProfileBuffer> *>( + InternalAlloc(sizeof(Vector<ProfileBuffer>))); + new (ProfileBuffers) Vector<ProfileBuffer>(); }); DCHECK_NE(GlobalAllocators, nullptr); + DCHECK_NE(ThreadTries, nullptr); + DCHECK_NE(ProfileBuffers, nullptr); ThreadTrie *Item = nullptr; { @@ -66,7 +76,7 @@ void post(const FunctionCallTrie &T, tid_t TId) { if (GlobalAllocators == nullptr) return; - Item = ThreadTries.PushBack(); + Item = ThreadTries->PushBack(); Item->TId = TId; // Here we're using the internal allocator instead of the managed allocator @@ -188,15 +198,15 @@ void serialize() { SpinMutexLock Lock(&GlobalMutex); // Clear out the global ProfileBuffers. - for (uptr I = 0; I < ProfileBuffers.Size(); ++I) - InternalFree(ProfileBuffers[I].Data); - ProfileBuffers.Reset(); + for (uptr I = 0; I < ProfileBuffers->Size(); ++I) + InternalFree((*ProfileBuffers)[I].Data); + ProfileBuffers->Reset(); - if (ThreadTries.Size() == 0) + if (ThreadTries->Size() == 0) return; // Then repopulate the global ProfileBuffers. - for (u32 I = 0; I < ThreadTries.Size(); ++I) { + for (u32 I = 0; I < ThreadTries->Size(); ++I) { using ProfileRecordAllocator = typename ProfileRecordArray::AllocatorType; ProfileRecordAllocator PRAlloc(profilingFlags()->global_allocator_max, 0); ProfileRecord::PathAllocator PathAlloc( @@ -207,7 +217,7 @@ void serialize() { // use a local allocator and an __xray::Array<...> to store the intermediary // data, then compute the size as we're going along. Then we'll allocate the // contiguous space to contain the thread buffer data. - const auto &Trie = *ThreadTries[I].Trie; + const auto &Trie = *(*ThreadTries)[I].Trie; if (Trie.getRoots().empty()) continue; populateRecords(ProfileRecords, PathAlloc, Trie); @@ -227,8 +237,8 @@ void serialize() { for (const auto &Record : ProfileRecords) CumulativeSizes += 20 + (4 * Record.Path->size()); - BlockHeader Header{16 + CumulativeSizes, I, ThreadTries[I].TId}; - auto Buffer = ProfileBuffers.PushBack(); + BlockHeader Header{16 + CumulativeSizes, I, (*ThreadTries)[I].TId}; + auto Buffer = ProfileBuffers->PushBack(); Buffer->Size = sizeof(Header) + CumulativeSizes; Buffer->Data = InternalAlloc(Buffer->Size, nullptr, 64); DCHECK_NE(Buffer->Data, nullptr); @@ -244,18 +254,26 @@ void serialize() { void reset() { SpinMutexLock Lock(&GlobalMutex); - // Clear out the profile buffers that have been serialized. - for (uptr I = 0; I < ProfileBuffers.Size(); ++I) - InternalFree(ProfileBuffers[I].Data); - ProfileBuffers.Reset(); - - // Clear out the function call tries per thread. - for (uptr I = 0; I < ThreadTries.Size(); ++I) { - auto &T = ThreadTries[I]; - T.Trie->~FunctionCallTrie(); - InternalFree(T.Trie); + if (ProfileBuffers != nullptr) { + // Clear out the profile buffers that have been serialized. + for (uptr I = 0; I < ProfileBuffers->Size(); ++I) + InternalFree((*ProfileBuffers)[I].Data); + ProfileBuffers->Reset(); + InternalFree(ProfileBuffers); + ProfileBuffers = nullptr; + } + + if (ThreadTries != nullptr) { + // Clear out the function call tries per thread. + for (uptr I = 0; I < ThreadTries->Size(); ++I) { + auto &T = (*ThreadTries)[I]; + T.Trie->~FunctionCallTrie(); + InternalFree(T.Trie); + } + ThreadTries->Reset(); + InternalFree(ThreadTries); + ThreadTries = nullptr; } - ThreadTries.Reset(); // Reset the global allocators. if (GlobalAllocators != nullptr) { @@ -267,18 +285,29 @@ void reset() { InternalAlloc(sizeof(FunctionCallTrie::Allocators))); new (GlobalAllocators) FunctionCallTrie::Allocators(); *GlobalAllocators = FunctionCallTrie::InitAllocators(); + ThreadTries = reinterpret_cast<Vector<ThreadTrie> *>( + InternalAlloc(sizeof(Vector<ThreadTrie>))); + new (ThreadTries) Vector<ThreadTrie>(); + ProfileBuffers = reinterpret_cast<Vector<ProfileBuffer> *>( + InternalAlloc(sizeof(Vector<ProfileBuffer>))); + new (ProfileBuffers) Vector<ProfileBuffer>(); } XRayBuffer nextBuffer(XRayBuffer B) { SpinMutexLock Lock(&GlobalMutex); - if (B.Data == nullptr && ProfileBuffers.Size()) - return {ProfileBuffers[0].Data, ProfileBuffers[0].Size}; + + if (ProfileBuffers == nullptr || ProfileBuffers->Size() == 0) + return {nullptr, 0}; + + if (B.Data == nullptr) + return {(*ProfileBuffers)[0].Data, (*ProfileBuffers)[0].Size}; BlockHeader Header; internal_memcpy(&Header, B.Data, sizeof(BlockHeader)); auto NextBlock = Header.BlockNum + 1; - if (NextBlock < ProfileBuffers.Size()) - return {ProfileBuffers[NextBlock].Data, ProfileBuffers[NextBlock].Size}; + if (NextBlock < ProfileBuffers->Size()) + return {(*ProfileBuffers)[NextBlock].Data, + (*ProfileBuffers)[NextBlock].Size}; return {nullptr, 0}; } diff --git a/lib/xray/xray_profiling.cc b/lib/xray/xray_profiling.cc index fa60263c2..2c5b82959 100644 --- a/lib/xray/xray_profiling.cc +++ b/lib/xray/xray_profiling.cc @@ -277,7 +277,7 @@ profilingLoggingInit(size_t BufferSize, size_t BufferMax, void *Options, // We need to reset the profile data collection implementation now. profileCollectorService::reset(); - // We need to set up the at-thread-exit handler. + // We need to set up the exit handlers. static pthread_once_t Once = PTHREAD_ONCE_INIT; pthread_once(&Once, +[] { pthread_key_create(&ProfilingKey, +[](void *P) { @@ -288,6 +288,19 @@ profilingLoggingInit(size_t BufferSize, size_t BufferMax, void *Options, postCurrentThreadFCT(TLD); }); + + // We also need to set up an exit handler, so that we can get the profile + // information at exit time. We use the C API to do this, to not rely on C++ + // ABI functions for registering exit handlers. + Atexit(+[] { + // Finalize and flush. + if (profilingFinalize() != XRAY_LOG_FINALIZED) + return; + if (profilingFlush() != XRAY_LOG_FLUSHED) + return; + if (Verbosity()) + Report("XRay Profile flushed at exit."); + }); }); __xray_log_set_buffer_iterator(profileCollectorService::nextBuffer); @@ -321,13 +334,16 @@ bool profilingDynamicInitializer() XRAY_NEVER_INSTRUMENT { profilingFlush, }; auto RegistrationResult = __xray_log_register_mode("xray-profiling", Impl); - if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && - Verbosity()) - Report("Cannot register XRay Profiling mode to 'xray-profiling'; error = " - "%d\n", - RegistrationResult); + if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) { + if (Verbosity()) + Report("Cannot register XRay Profiling mode to 'xray-profiling'; error = " + "%d\n", + RegistrationResult); + return false; + } + if (!internal_strcmp(flags()->xray_mode, "xray-profiling")) - __xray_set_log_impl(Impl); + __xray_log_select_mode("xray_profiling"); return true; } diff --git a/lib/xray/xray_profiling_flags.inc b/lib/xray/xray_profiling_flags.inc index 616bc83ad..04ccd459d 100644 --- a/lib/xray/xray_profiling_flags.inc +++ b/lib/xray/xray_profiling_flags.inc @@ -20,7 +20,7 @@ XRAY_FLAG(uptr, global_allocator_max, 2 << 24, "Maximum size of the global allocator for profile storage.") XRAY_FLAG(uptr, stack_allocator_max, 2 << 24, "Maximum size of the traversal stack allocator.") -XRAY_FLAG(int, grace_period_ms, 100, +XRAY_FLAG(int, grace_period_ms, 1, "Profile collection will wait this much time in milliseconds before " "resetting the global state. This gives a chance to threads to " "notice that the profiler has been finalized and clean up.") |