summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDean Michael Berris <dberris@google.com>2018-07-18 01:31:30 +0000
committerDean Michael Berris <dberris@google.com>2018-07-18 01:31:30 +0000
commitf8e9626b112c22014212116b8d53dcc0d2c657d1 (patch)
tree03e24b968e3ba9a0f0f8be66d21b591a431bed1a
parent9d27f84cb4ddc9a9401507f60cb3ac09a37231a4 (diff)
[XRay][compiler-rt] FDR Mode: Allow multiple runs
Summary: Fix a bug in FDR mode which didn't allow for re-initialising the logging in the same process. This change ensures that: - When we flush the FDR mode logging, that the state of the logging implementation is `XRAY_LOG_UNINITIALIZED`. - Fix up the thread-local initialisation to use aligned storage and `pthread_getspecific` as well as `pthread_setspecific` for the thread-specific data. - Actually use the pointer provided to the thread-exit cleanup handling, instead of assuming that the thread has thread-local data associated with it, and reaching at thread-exit time. In this change we also have an explicit test for two consecutive sessions for FDR mode tracing, and ensuring both sessions succeed. Reviewers: kpw, eizan Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D49359 git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@337341 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/xray/xray_fdr_logging.cc83
-rw-r--r--test/xray/TestCases/Posix/fdr-mode-multiple.cc76
2 files changed, 126 insertions, 33 deletions
diff --git a/lib/xray/xray_fdr_logging.cc b/lib/xray/xray_fdr_logging.cc
index 3565f2571..bb8abbe42 100644
--- a/lib/xray/xray_fdr_logging.cc
+++ b/lib/xray/xray_fdr_logging.cc
@@ -15,11 +15,11 @@
//
//===----------------------------------------------------------------------===//
#include "xray_fdr_logging.h"
-#include <pthread.h>
#include <cassert>
#include <errno.h>
#include <limits>
#include <memory>
+#include <pthread.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <time.h>
@@ -80,6 +80,16 @@ static constexpr auto FunctionRecSize = sizeof(FunctionRecord);
// Use a global pthread key to identify thread-local data for logging.
static pthread_key_t Key;
+// Global BufferQueue.
+static BufferQueue *BQ = nullptr;
+
+static atomic_sint32_t LogFlushStatus = {
+ XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
+
+static FDRLoggingOptions FDROptions;
+
+static SpinMutex FDROptionsMutex;
+
// This function will initialize the thread-local data structure used by the FDR
// logging implementation and return a reference to it. The implementation
// details require a bit of care to maintain.
@@ -114,15 +124,18 @@ static pthread_key_t Key;
// critical section, calling a function that might be XRay instrumented (and
// thus in turn calling into malloc by virtue of registration of the
// thread_local's destructor).
+static_assert(alignof(ThreadLocalData) >= 64,
+ "ThreadLocalData must be cache line aligned.");
static ThreadLocalData &getThreadLocalData() {
- static_assert(alignof(ThreadLocalData) >= 64,
- "ThreadLocalData must be cache line aligned.");
- thread_local ThreadLocalData TLD;
- thread_local bool UNUSED ThreadOnce = [] {
- pthread_setspecific(Key, &TLD);
- return false;
- }();
- return TLD;
+ thread_local typename std::aligned_storage<
+ sizeof(ThreadLocalData), alignof(ThreadLocalData)>::type TLDStorage{};
+
+ if (pthread_getspecific(Key) == NULL) {
+ new (reinterpret_cast<ThreadLocalData *>(&TLDStorage)) ThreadLocalData{};
+ pthread_setspecific(Key, &TLDStorage);
+ }
+
+ return *reinterpret_cast<ThreadLocalData *>(&TLDStorage);
}
static void writeNewBufferPreamble(tid_t Tid, timespec TS,
@@ -416,7 +429,7 @@ static bool prepareBuffer(uint64_t TSC, unsigned char CPU,
return false;
auto EC = TLD.BQ->getBuffer(TLD.Buffer);
if (EC != BufferQueue::ErrorCode::Ok) {
- Report("Failed to acquire a buffer; error=%s\n",
+ Report("Failed to prepare a buffer; error = '%s'\n",
BufferQueue::getErrorString(EC));
return false;
}
@@ -462,7 +475,7 @@ isLogInitializedAndReady(BufferQueue *LBQ, uint64_t TSC, unsigned char CPU,
auto LS = atomic_load(&LoggingStatus, memory_order_acquire);
if (LS != XRayLogInitStatus::XRAY_LOG_FINALIZING &&
LS != XRayLogInitStatus::XRAY_LOG_FINALIZED)
- Report("Failed to acquire a buffer; error=%s\n",
+ Report("Failed to acquire a buffer; error = '%s'\n",
BufferQueue::getErrorString(EC));
return false;
}
@@ -541,8 +554,8 @@ thread_local atomic_uint8_t Running{0};
static void processFunctionHook(int32_t FuncId, XRayEntryType Entry,
uint64_t TSC, unsigned char CPU, uint64_t Arg1,
int (*wall_clock_reader)(clockid_t,
- struct timespec *),
- BufferQueue *BQ) XRAY_NEVER_INSTRUMENT {
+ struct timespec *))
+ XRAY_NEVER_INSTRUMENT {
__asm volatile("# LLVM-MCA-BEGIN processFunctionHook");
// Prevent signal handler recursion, so in case we're already in a log writing
// mode and the signal handler comes in (and is also instrumented) then we
@@ -557,8 +570,6 @@ static void processFunctionHook(int32_t FuncId, XRayEntryType Entry,
auto &TLD = getThreadLocalData();
- // In case the reference has been cleaned up before, we make sure we
- // initialize it to the provided BufferQueue.
if (TLD.BQ == nullptr)
TLD.BQ = BQ;
@@ -656,15 +667,6 @@ static void processFunctionHook(int32_t FuncId, XRayEntryType Entry,
__asm volatile("# LLVM-MCA-END");
}
-// Global BufferQueue.
-BufferQueue *BQ = nullptr;
-
-atomic_sint32_t LogFlushStatus = {XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
-
-FDRLoggingOptions FDROptions;
-
-SpinMutex FDROptionsMutex;
-
static XRayFileHeader &fdrCommonHeaderInfo() {
static std::aligned_storage<sizeof(XRayFileHeader)>::type HStorage;
static pthread_once_t OnceInit = PTHREAD_ONCE_INIT;
@@ -789,15 +791,28 @@ XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT {
// we decide to do anything further with the global buffer queue.
__xray_log_remove_buffer_iterator();
+ // Once flushed, we should set the global status of the logging implementation
+ // to "uninitialized" to allow for FDR-logging multiple runs.
+ auto ResetToUnitialized = at_scope_exit([] {
+ atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
+ memory_order_release);
+ });
+
+ auto CleanupBuffers = at_scope_exit([] {
+ if (BQ != nullptr) {
+ auto &TLD = getThreadLocalData();
+ if (TLD.RecordPtr != nullptr && TLD.BQ != nullptr)
+ releaseThreadLocalBuffer(*TLD.BQ);
+ BQ->~BufferQueue();
+ InternalFree(BQ);
+ BQ = nullptr;
+ }
+ });
+
if (fdrFlags()->no_file_flush) {
if (Verbosity())
Report("XRay FDR: Not flushing to file, 'no_file_flush=true'.\n");
- // Clean up the buffer queue, and do not bother writing out the files!
- BQ->~BufferQueue();
- InternalFree(BQ);
- BQ = nullptr;
-
atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
memory_order_release);
return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
@@ -913,13 +928,13 @@ static TSCAndCPU getTimestamp() XRAY_NEVER_INSTRUMENT {
void fdrLoggingHandleArg0(int32_t FuncId,
XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
auto TC = getTimestamp();
- processFunctionHook(FuncId, Entry, TC.TSC, TC.CPU, 0, clock_gettime, BQ);
+ processFunctionHook(FuncId, Entry, TC.TSC, TC.CPU, 0, clock_gettime);
}
void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry,
uint64_t Arg) XRAY_NEVER_INSTRUMENT {
auto TC = getTimestamp();
- processFunctionHook(FuncId, Entry, TC.TSC, TC.CPU, Arg, clock_gettime, BQ);
+ processFunctionHook(FuncId, Entry, TC.TSC, TC.CPU, Arg, clock_gettime);
}
void fdrLoggingHandleCustomEvent(void *Event,
@@ -1121,8 +1136,10 @@ XRayLogInitStatus fdrLoggingInit(size_t BufferSize, size_t BufferMax,
probeRequiredCPUFeatures() ? getTSCFrequency()
: __xray::NanosecondsPerSecond,
memory_order_release);
- pthread_key_create(&Key, +[](void *) {
- auto &TLD = getThreadLocalData();
+ pthread_key_create(&Key, +[](void *TLDPtr) {
+ if (TLDPtr == nullptr)
+ return;
+ auto &TLD = *reinterpret_cast<ThreadLocalData *>(TLDPtr);
if (TLD.BQ == nullptr)
return;
auto EC = TLD.BQ->releaseBuffer(TLD.Buffer);
diff --git a/test/xray/TestCases/Posix/fdr-mode-multiple.cc b/test/xray/TestCases/Posix/fdr-mode-multiple.cc
new file mode 100644
index 000000000..487e30313
--- /dev/null
+++ b/test/xray/TestCases/Posix/fdr-mode-multiple.cc
@@ -0,0 +1,76 @@
+// RUN: %clangxx_xray -g -std=c++11 %s -o %t -fxray-modes=xray-fdr
+// RUN: rm -f fdr-inmemory-test-*
+// RUN: XRAY_OPTIONS="patch_premain=false xray_logfile_base=fdr-inmemory-test- \
+// RUN: verbosity=1" \
+// RUN: XRAY_FDR_OPTIONS="no_file_flush=true func_duration_threshold_us=0" \
+// RUN: %run %t 2>&1 | FileCheck %s
+// RUN: FILES=`find %T -name 'fdr-inmemory-test-*' | wc -l`
+// RUN: [ $FILES -eq 0 ]
+// RUN: rm -f fdr-inmemory-test-*
+//
+// REQUIRES: x86_64-target-arch
+// REQUIRES: built-in-llvm-tree
+
+#include "xray/xray_log_interface.h"
+#include <cassert>
+#include <iostream>
+
+uint64_t var = 0;
+uint64_t buffers = 0;
+[[clang::xray_always_instrument]] void __attribute__((noinline)) f() { ++var; }
+
+int main(int argc, char *argv[]) {
+ assert(__xray_log_select_mode("xray-fdr") ==
+ XRayLogRegisterStatus::XRAY_REGISTRATION_OK);
+ auto status = __xray_log_init_mode(
+ "xray-fdr",
+ "buffer_size=4096:buffer_max=10:func_duration_threshold_us=0");
+ assert(status == XRayLogInitStatus::XRAY_LOG_INITIALIZED);
+ __xray_patch();
+
+ // Create enough entries.
+ for (int i = 0; i != 1 << 20; ++i) {
+ f();
+ }
+
+ // Then we want to verify that we're getting 10 buffers outside of the initial
+ // header.
+ auto finalize_status = __xray_log_finalize();
+ assert(finalize_status == XRayLogInitStatus::XRAY_LOG_FINALIZED);
+ auto process_status =
+ __xray_log_process_buffers([](const char *, XRayBuffer) { ++buffers; });
+ std::cout << "buffers = " << buffers << std::endl;
+ assert(process_status == XRayLogFlushStatus::XRAY_LOG_FLUSHED);
+ auto flush_status = __xray_log_flushLog();
+ assert(flush_status == XRayLogFlushStatus::XRAY_LOG_FLUSHED);
+ // We expect 11 buffers because 1 header buffer + 10 actual FDR buffers.
+ // CHECK: Buffers = 11
+ std::cout << "Buffers = " << buffers << std::endl;
+
+ // In this test we ensure that we can restart the cycle after the flush.
+ status = __xray_log_init_mode(
+ "xray-fdr",
+ "buffer_size=4096:buffer_max=10:func_duration_threshold_us=0");
+ assert(status == XRayLogInitStatus::XRAY_LOG_INITIALIZED);
+ __xray_patch();
+
+ // Create enough entries.
+ for (int i = 0; i != 1 << 20; ++i) {
+ f();
+ }
+
+ // Then we want to verify that we're getting 10 buffers outside of the initial
+ // header.
+ finalize_status = __xray_log_finalize();
+ assert(finalize_status == XRayLogInitStatus::XRAY_LOG_FINALIZED);
+ process_status =
+ __xray_log_process_buffers([](const char *, XRayBuffer) { ++buffers; });
+ std::cout << "buffers = " << buffers << std::endl;
+ assert(process_status == XRayLogFlushStatus::XRAY_LOG_FLUSHED);
+ flush_status = __xray_log_flushLog();
+ assert(flush_status == XRayLogFlushStatus::XRAY_LOG_FLUSHED);
+ // We expect 22 buffers because 1 header buffer + 10 actual FDR buffers, plus
+ // the number of buffers we got from the previous run (also 11).
+ // CHECK: Buffers = 22
+ std::cout << "Buffers = " << buffers << std::endl;
+}