summaryrefslogtreecommitdiff
path: root/lib/xray/xray_fdr_logging.cc
diff options
context:
space:
mode:
authorDean Michael Berris <dberris@google.com>2017-11-21 07:16:57 +0000
committerDean Michael Berris <dberris@google.com>2017-11-21 07:16:57 +0000
commit1bcf351370199c4c6b9dc24972469aa89ce75478 (patch)
tree72fb7e59edb9b2e8bda4ef6a3cfd5f85b1504fcf /lib/xray/xray_fdr_logging.cc
parent9ccd9976910686c34baf18493c0796458e826847 (diff)
[XRay] Use optimistic logging model for FDR mode
Summary: Before this change, the FDR mode implementation relied on at thread-exit handling to return buffers back to the (global) buffer queue. This introduces issues with the initialisation of the thread_local objects which, even through the use of pthread_setspecific(...) may eventually call into an allocation function. Similar to previous changes in this line, we're finding that there is a huge potential for deadlocks when initialising these thread-locals when the memory allocation implementation is also xray-instrumented. In this change, we limit the call to pthread_setspecific(...) to provide a non-null value to associate to the key created with pthread_key_create(...). While this doesn't completely eliminate the potential for the deadlock(s), it does allow us to still clean up at thread exit when we need to. The change is that we don't need to do more work when starting and ending a thread's lifetime. We also have a test to make sure that we actually can safely recycle the buffers in case we end up re-using the buffer(s) available from the queue on multiple thread entry/exits. This change cuts across both LLVM and compiler-rt to allow us to update both the XRay runtime implementation as well as the library support for loading these new versions of the FDR mode logging. Version 2 of the FDR logging implementation makes the following changes: * Introduction of a new 'BufferExtents' metadata record that's outside of the buffer's contents but are written before the actual buffer. This data is associated to the Buffer handed out by the BufferQueue rather than a record that occupies bytes in the actual buffer. * Removal of the "end of buffer" records. This is in-line with the changes we described above, to allow for optimistic logging without explicit record writing at thread exit. The optimistic logging model operates under the following assumptions: * Threads writing to the buffers will potentially race with the thread attempting to flush the log. To avoid this situation from occuring, we make sure that when we've finalized the logging implementation, that threads will see this finalization state on the next write, and either choose to not write records the thread would have written or write the record(s) in two phases -- first write the record(s), then update the extents metadata. * We change the buffer queue implementation so that once it's handed out a buffer to a thread, that we assume that buffer is marked "used" to be able to capture partial writes. None of this will be safe to handle if threads are racing to write the extents records and the reader thread is attempting to flush the log. The optimism comes from the finalization routine being required to complete before we attempt to flush the log. This is a fairly significant semantics change for the FDR implementation. This is why we've decided to update the version number for FDR mode logs. The tools, however, still need to be able to support older versions of the log until we finally deprecate those earlier versions. Reviewers: dblaikie, pelikan, kpw Subscribers: llvm-commits, hiraditya Differential Revision: https://reviews.llvm.org/D39526 git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@318733 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/xray/xray_fdr_logging.cc')
-rw-r--r--lib/xray/xray_fdr_logging.cc149
1 files changed, 104 insertions, 45 deletions
diff --git a/lib/xray/xray_fdr_logging.cc b/lib/xray/xray_fdr_logging.cc
index cf27acc24..035fc4061 100644
--- a/lib/xray/xray_fdr_logging.cc
+++ b/lib/xray/xray_fdr_logging.cc
@@ -15,9 +15,9 @@
//
//===----------------------------------------------------------------------===//
#include "xray_fdr_logging.h"
+#include <errno.h>
#include <sys/syscall.h>
#include <sys/time.h>
-#include <errno.h>
#include <time.h>
#include <unistd.h>
@@ -35,10 +35,7 @@
namespace __xray {
// Global BufferQueue.
-// NOTE: This is a pointer to avoid having to do atomic operations at
-// initialization time. This is OK to leak as there will only be one bufferqueue
-// for the runtime, initialized once through the fdrInit(...) sequence.
-std::shared_ptr<BufferQueue> *BQ = nullptr;
+BufferQueue *BQ = nullptr;
__sanitizer::atomic_sint32_t LogFlushStatus = {
XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
@@ -51,19 +48,37 @@ __sanitizer::SpinMutex FDROptionsMutex;
XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT {
if (__sanitizer::atomic_load(&LoggingStatus,
__sanitizer::memory_order_acquire) !=
- XRayLogInitStatus::XRAY_LOG_FINALIZED)
+ XRayLogInitStatus::XRAY_LOG_FINALIZED) {
+ if (__sanitizer::Verbosity())
+ Report("Not flushing log, implementation is not finalized.\n");
return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
+ }
s32 Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
if (!__sanitizer::atomic_compare_exchange_strong(
&LogFlushStatus, &Result, XRayLogFlushStatus::XRAY_LOG_FLUSHING,
- __sanitizer::memory_order_release))
+ __sanitizer::memory_order_release)) {
+
+ if (__sanitizer::Verbosity())
+ Report("Not flushing log, implementation is still finalizing.\n");
return static_cast<XRayLogFlushStatus>(Result);
+ }
- // Make a copy of the BufferQueue pointer to prevent other threads that may be
- // resetting it from blowing away the queue prematurely while we're dealing
- // with it.
- auto LocalBQ = *BQ;
+ if (BQ == nullptr) {
+ if (__sanitizer::Verbosity())
+ Report("Cannot flush when global buffer queue is null.\n");
+ return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
+ }
+
+ // We wait a number of microseconds to allow threads to see that we've
+ // finalised before attempting to flush the log.
+ struct timespec TS;
+ TS.tv_sec = flags()->xray_fdr_log_grace_period_us / 1000000;
+ TS.tv_nsec = (flags()->xray_fdr_log_grace_period_us % 1000000) * 1000;
+ struct timespec Rem;
+ while (clock_nanosleep(CLOCK_REALTIME, 0, &TS, &Rem) &&
+ (Rem.tv_sec != 0 || Rem.tv_nsec != 0))
+ TS = Rem;
// We write out the file in the following format:
//
@@ -94,34 +109,44 @@ XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT {
TSCSupported ? getTSCFrequency() : __xray::NanosecondsPerSecond;
XRayFileHeader Header;
- Header.Version = 1;
+
+ // Version 2 of the log writes the extents of the buffer, instead of relying
+ // on an end-of-buffer record.
+ Header.Version = 2;
Header.Type = FileTypes::FDR_LOG;
Header.CycleFrequency = CycleFrequency;
+
// FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc'
// before setting the values in the header.
Header.ConstantTSC = 1;
Header.NonstopTSC = 1;
- Header.FdrData = FdrAdditionalHeaderData{LocalBQ->ConfiguredBufferSize()};
+ Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()};
retryingWriteAll(Fd, reinterpret_cast<char *>(&Header),
reinterpret_cast<char *>(&Header) + sizeof(Header));
- LocalBQ->apply([&](const BufferQueue::Buffer &B) {
- uint64_t BufferSize = B.Size;
- if (BufferSize > 0) {
+ BQ->apply([&](const BufferQueue::Buffer &B) {
+ // Starting at version 2 of the FDR logging implementation, we only write
+ // the records identified by the extents of the buffer. We use the Extents
+ // from the Buffer and write that out as the first record in the buffer.
+ // We still use a Metadata record, but fill in the extents instead for the
+ // data.
+ MetadataRecord ExtentsRecord;
+ auto BufferExtents = __sanitizer::atomic_load(
+ &B.Extents->Size, __sanitizer::memory_order_acquire);
+ assert(BufferExtents <= B.Size);
+ ExtentsRecord.Type = uint8_t(RecordType::Metadata);
+ ExtentsRecord.RecordKind =
+ uint8_t(MetadataRecord::RecordKinds::BufferExtents);
+ std::memcpy(ExtentsRecord.Data, &BufferExtents, sizeof(BufferExtents));
+ if (BufferExtents > 0) {
+ retryingWriteAll(Fd, reinterpret_cast<char *>(&ExtentsRecord),
+ reinterpret_cast<char *>(&ExtentsRecord) +
+ sizeof(MetadataRecord));
retryingWriteAll(Fd, reinterpret_cast<char *>(B.Buffer),
- reinterpret_cast<char *>(B.Buffer) + B.Size);
+ reinterpret_cast<char *>(B.Buffer) + BufferExtents);
}
});
- // The buffer for this particular thread would have been finalised after
- // we've written everything to disk, and we'd lose the thread's trace.
- auto &TLD = __xray::__xray_fdr_internal::getThreadLocalData();
- if (TLD.Buffer.Buffer != nullptr) {
- __xray::__xray_fdr_internal::writeEOBMetadata();
- auto Start = reinterpret_cast<char *>(TLD.Buffer.Buffer);
- retryingWriteAll(Fd, Start, Start + TLD.Buffer.Size);
- }
-
__sanitizer::atomic_store(&LogFlushStatus,
XRayLogFlushStatus::XRAY_LOG_FLUSHED,
__sanitizer::memory_order_release);
@@ -133,12 +158,15 @@ XRayLogInitStatus fdrLoggingFinalize() XRAY_NEVER_INSTRUMENT {
if (!__sanitizer::atomic_compare_exchange_strong(
&LoggingStatus, &CurrentStatus,
XRayLogInitStatus::XRAY_LOG_FINALIZING,
- __sanitizer::memory_order_release))
+ __sanitizer::memory_order_release)) {
+ if (__sanitizer::Verbosity())
+ Report("Cannot finalize log, implementation not initialized.\n");
return static_cast<XRayLogInitStatus>(CurrentStatus);
+ }
// Do special things to make the log finalize itself, and not allow any more
// operations to be performed until re-initialized.
- (*BQ)->finalize();
+ BQ->finalize();
__sanitizer::atomic_store(&LoggingStatus,
XRayLogInitStatus::XRAY_LOG_FINALIZED,
@@ -155,7 +183,8 @@ XRayLogInitStatus fdrLoggingReset() XRAY_NEVER_INSTRUMENT {
return static_cast<XRayLogInitStatus>(CurrentStatus);
// Release the in-memory buffer queue.
- BQ->reset();
+ delete BQ;
+ BQ = nullptr;
// Spin until the flushing status is flushed.
s32 CurrentFlushingStatus = XRayLogFlushStatus::XRAY_LOG_FLUSHED;
@@ -173,8 +202,8 @@ XRayLogInitStatus fdrLoggingReset() XRAY_NEVER_INSTRUMENT {
}
struct TSCAndCPU {
- uint64_t TSC;
- unsigned char CPU;
+ uint64_t TSC = 0;
+ unsigned char CPU = 0;
};
static TSCAndCPU getTimestamp() XRAY_NEVER_INSTRUMENT {
@@ -205,15 +234,15 @@ static TSCAndCPU getTimestamp() XRAY_NEVER_INSTRUMENT {
void fdrLoggingHandleArg0(int32_t FuncId,
XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
auto TC = getTimestamp();
- __xray_fdr_internal::processFunctionHook(FuncId, Entry, TC.TSC,
- TC.CPU, 0, clock_gettime, *BQ);
+ __xray_fdr_internal::processFunctionHook(FuncId, Entry, TC.TSC, TC.CPU, 0,
+ clock_gettime, BQ);
}
void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry,
uint64_t Arg) XRAY_NEVER_INSTRUMENT {
auto TC = getTimestamp();
- __xray_fdr_internal::processFunctionHook(
- FuncId, Entry, TC.TSC, TC.CPU, Arg, clock_gettime, *BQ);
+ __xray_fdr_internal::processFunctionHook(FuncId, Entry, TC.TSC, TC.CPU, Arg,
+ clock_gettime, BQ);
}
void fdrLoggingHandleCustomEvent(void *Event,
@@ -223,10 +252,8 @@ void fdrLoggingHandleCustomEvent(void *Event,
auto &TSC = TC.TSC;
auto &CPU = TC.CPU;
RecursionGuard Guard{Running};
- if (!Guard) {
- assert(Running && "RecursionGuard is buggy!");
+ if (!Guard)
return;
- }
if (EventSize > std::numeric_limits<int32_t>::max()) {
using Empty = struct {};
static Empty Once = [&] {
@@ -238,7 +265,7 @@ void fdrLoggingHandleCustomEvent(void *Event,
}
int32_t ReducedEventSize = static_cast<int32_t>(EventSize);
auto &TLD = getThreadLocalData();
- if (!isLogInitializedAndReady(TLD.LocalBQ, TSC, CPU, clock_gettime))
+ if (!isLogInitializedAndReady(TLD.BQ, TSC, CPU, clock_gettime))
return;
// Here we need to prepare the log to handle:
@@ -246,7 +273,7 @@ void fdrLoggingHandleCustomEvent(void *Event,
// - The additional data we're going to write. Currently, that's the size of
// the event we're going to dump into the log as free-form bytes.
if (!prepareBuffer(TSC, CPU, clock_gettime, MetadataRecSize + EventSize)) {
- TLD.LocalBQ = nullptr;
+ TLD.BQ = nullptr;
return;
}
@@ -264,21 +291,29 @@ void fdrLoggingHandleCustomEvent(void *Event,
std::memcpy(TLD.RecordPtr, &CustomEvent, sizeof(CustomEvent));
TLD.RecordPtr += sizeof(CustomEvent);
std::memcpy(TLD.RecordPtr, Event, ReducedEventSize);
+ incrementExtents(MetadataRecSize + EventSize);
endBufferIfFull();
}
XRayLogInitStatus fdrLoggingInit(std::size_t BufferSize, std::size_t BufferMax,
void *Options,
size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
- if (OptionsSize != sizeof(FDRLoggingOptions))
+ if (OptionsSize != sizeof(FDRLoggingOptions)) {
+ if (__sanitizer::Verbosity())
+ Report("Cannot initialize FDR logging; wrong size for options: %d\n",
+ OptionsSize);
return static_cast<XRayLogInitStatus>(__sanitizer::atomic_load(
&LoggingStatus, __sanitizer::memory_order_acquire));
+ }
s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
if (!__sanitizer::atomic_compare_exchange_strong(
&LoggingStatus, &CurrentStatus,
XRayLogInitStatus::XRAY_LOG_INITIALIZING,
- __sanitizer::memory_order_release))
+ __sanitizer::memory_order_release)) {
+ if (__sanitizer::Verbosity())
+ Report("Cannot initialize already initialized implementation.\n");
return static_cast<XRayLogInitStatus>(CurrentStatus);
+ }
{
__sanitizer::SpinMutexLock Guard(&FDROptionsMutex);
@@ -286,15 +321,37 @@ XRayLogInitStatus fdrLoggingInit(std::size_t BufferSize, std::size_t BufferMax,
}
bool Success = false;
+
+ if (BQ != nullptr) {
+ delete BQ;
+ BQ = nullptr;
+ }
+
if (BQ == nullptr)
- BQ = new std::shared_ptr<BufferQueue>();
+ BQ = new BufferQueue(BufferSize, BufferMax, Success);
- *BQ = std::make_shared<BufferQueue>(BufferSize, BufferMax, Success);
if (!Success) {
Report("BufferQueue init failed.\n");
+ if (BQ != nullptr) {
+ delete BQ;
+ BQ = nullptr;
+ }
return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
}
+ static bool UNUSED Once = [] {
+ pthread_key_create(&__xray_fdr_internal::Key, +[](void *) {
+ auto &TLD = __xray_fdr_internal::getThreadLocalData();
+ if (TLD.BQ == nullptr)
+ return;
+ auto EC = TLD.BQ->releaseBuffer(TLD.Buffer);
+ if (EC != BufferQueue::ErrorCode::Ok)
+ Report("At thread exit, failed to release buffer at %p; error=%s\n",
+ TLD.Buffer.Buffer, BufferQueue::getErrorString(EC));
+ });
+ return false;
+ }();
+
// Arg1 handler should go in first to avoid concurrent code accidentally
// falling back to arg0 when it should have ran arg1.
__xray_set_handler_arg1(fdrLoggingHandleArg1);
@@ -305,7 +362,9 @@ XRayLogInitStatus fdrLoggingInit(std::size_t BufferSize, std::size_t BufferMax,
__sanitizer::atomic_store(&LoggingStatus,
XRayLogInitStatus::XRAY_LOG_INITIALIZED,
__sanitizer::memory_order_release);
- Report("XRay FDR init successful.\n");
+
+ if (__sanitizer::Verbosity())
+ Report("XRay FDR init successful.\n");
return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
}