summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/xray/tests/unit/fdr_logging_test.cc105
-rw-r--r--lib/xray/xray_buffer_queue.cc34
-rw-r--r--lib/xray/xray_buffer_queue.h5
-rw-r--r--lib/xray/xray_fdr_log_records.h2
-rw-r--r--lib/xray/xray_fdr_logging.cc149
-rw-r--r--lib/xray/xray_fdr_logging_impl.h268
-rw-r--r--lib/xray/xray_flags.inc4
7 files changed, 321 insertions, 246 deletions
diff --git a/lib/xray/tests/unit/fdr_logging_test.cc b/lib/xray/tests/unit/fdr_logging_test.cc
index edc5e3c74..1009d56a4 100644
--- a/lib/xray/tests/unit/fdr_logging_test.cc
+++ b/lib/xray/tests/unit/fdr_logging_test.cc
@@ -17,8 +17,10 @@
#include <iostream>
#include <sys/mman.h>
#include <sys/stat.h>
+#include <sys/syscall.h>
#include <sys/types.h>
#include <system_error>
+#include <thread>
#include <unistd.h>
#include "xray/xray_records.h"
@@ -34,14 +36,23 @@ struct ScopedFileCloserAndDeleter {
: Fd(Fd), Filename(Filename) {}
~ScopedFileCloserAndDeleter() {
+ if (Map)
+ munmap(Map, Size);
if (Fd) {
close(Fd);
unlink(Filename);
}
}
+ void registerMap(void *M, size_t S) {
+ Map = M;
+ Size = S;
+ }
+
int Fd;
const char *Filename;
+ void *Map = nullptr;
+ size_t Size = 0;
};
TEST(FDRLoggingTest, Simple) {
@@ -51,7 +62,7 @@ TEST(FDRLoggingTest, Simple) {
Options.Fd = mkstemp(TmpFilename);
ASSERT_NE(Options.Fd, -1);
ASSERT_EQ(fdrLoggingInit(kBufferSize, kBufferMax, &Options,
- sizeof(FDRLoggingOptions)),
+ sizeof(FDRLoggingOptions)),
XRayLogInitStatus::XRAY_LOG_INITIALIZED);
fdrLoggingHandleArg0(1, XRayEntryType::ENTRY);
fdrLoggingHandleArg0(1, XRayEntryType::EXIT);
@@ -67,20 +78,25 @@ TEST(FDRLoggingTest, Simple) {
auto Size = lseek(Fd, 0, SEEK_END);
ASSERT_NE(Size, 0);
// Map the file contents.
- const char *Contents = static_cast<const char *>(
- mmap(NULL, Size, PROT_READ, MAP_PRIVATE, Fd, 0));
+ void *Map = mmap(NULL, Size, PROT_READ, MAP_PRIVATE, Fd, 0);
+ const char *Contents = static_cast<const char *>(Map);
+ Guard.registerMap(Map, Size);
ASSERT_NE(Contents, nullptr);
XRayFileHeader H;
memcpy(&H, Contents, sizeof(XRayFileHeader));
- ASSERT_EQ(H.Version, 1);
+ ASSERT_EQ(H.Version, 2);
ASSERT_EQ(H.Type, FileTypes::FDR_LOG);
- // We require one buffer at least to have the "start of buffer" metadata
- // record.
- MetadataRecord MDR;
- memcpy(&MDR, Contents + sizeof(XRayFileHeader), sizeof(MetadataRecord));
- ASSERT_EQ(MDR.RecordKind, uint8_t(MetadataRecord::RecordKinds::NewBuffer));
+ // We require one buffer at least to have the "extents" metadata record,
+ // followed by the NewBuffer record.
+ MetadataRecord MDR0, MDR1;
+ memcpy(&MDR0, Contents + sizeof(XRayFileHeader), sizeof(MetadataRecord));
+ memcpy(&MDR1, Contents + sizeof(XRayFileHeader) + sizeof(MetadataRecord),
+ sizeof(MetadataRecord));
+ ASSERT_EQ(MDR0.RecordKind,
+ uint8_t(MetadataRecord::RecordKinds::BufferExtents));
+ ASSERT_EQ(MDR1.RecordKind, uint8_t(MetadataRecord::RecordKinds::NewBuffer));
}
TEST(FDRLoggingTest, Multiple) {
@@ -89,7 +105,7 @@ TEST(FDRLoggingTest, Multiple) {
Options.Fd = mkstemp(TmpFilename);
ASSERT_NE(Options.Fd, -1);
ASSERT_EQ(fdrLoggingInit(kBufferSize, kBufferMax, &Options,
- sizeof(FDRLoggingOptions)),
+ sizeof(FDRLoggingOptions)),
XRayLogInitStatus::XRAY_LOG_INITIALIZED);
for (uint64_t I = 0; I < 100; ++I) {
fdrLoggingHandleArg0(1, XRayEntryType::ENTRY);
@@ -107,18 +123,77 @@ TEST(FDRLoggingTest, Multiple) {
auto Size = lseek(Fd, 0, SEEK_END);
ASSERT_NE(Size, 0);
// Map the file contents.
- const char *Contents = static_cast<const char *>(
- mmap(NULL, Size, PROT_READ, MAP_PRIVATE, Fd, 0));
+ void *Map = mmap(NULL, Size, PROT_READ, MAP_PRIVATE, Fd, 0);
+ const char *Contents = static_cast<const char *>(Map);
+ Guard.registerMap(Map, Size);
+ ASSERT_NE(Contents, nullptr);
+
+ XRayFileHeader H;
+ memcpy(&H, Contents, sizeof(XRayFileHeader));
+ ASSERT_EQ(H.Version, 2);
+ ASSERT_EQ(H.Type, FileTypes::FDR_LOG);
+
+ MetadataRecord MDR0, MDR1;
+ memcpy(&MDR0, Contents + sizeof(XRayFileHeader), sizeof(MetadataRecord));
+ memcpy(&MDR1, Contents + sizeof(XRayFileHeader) + sizeof(MetadataRecord),
+ sizeof(MetadataRecord));
+ ASSERT_EQ(MDR0.RecordKind,
+ uint8_t(MetadataRecord::RecordKinds::BufferExtents));
+ ASSERT_EQ(MDR1.RecordKind, uint8_t(MetadataRecord::RecordKinds::NewBuffer));
+}
+
+TEST(FDRLoggingTest, MultiThreadedCycling) {
+ FDRLoggingOptions Options;
+ char TmpFilename[] = "fdr-logging-test.XXXXXX";
+ Options.Fd = mkstemp(TmpFilename);
+ ASSERT_NE(Options.Fd, -1);
+ ASSERT_EQ(fdrLoggingInit(kBufferSize, 1, &Options, sizeof(FDRLoggingOptions)),
+ XRayLogInitStatus::XRAY_LOG_INITIALIZED);
+
+ // Now we want to create one thread, do some logging, then create another one,
+ // in succession and making sure that we're able to get thread records from
+ // the latest thread (effectively being able to recycle buffers).
+ std::array<pid_t, 2> Threads;
+ for (uint64_t I = 0; I < 2; ++I) {
+ std::thread t{[I, &Threads] {
+ fdrLoggingHandleArg0(I + 1, XRayEntryType::ENTRY);
+ fdrLoggingHandleArg0(I + 1, XRayEntryType::EXIT);
+ Threads[I] = syscall(SYS_gettid);
+ }};
+ t.join();
+ }
+ ASSERT_EQ(fdrLoggingFinalize(), XRayLogInitStatus::XRAY_LOG_FINALIZED);
+ ASSERT_EQ(fdrLoggingFlush(), XRayLogFlushStatus::XRAY_LOG_FLUSHED);
+
+ // To do this properly, we have to close the file descriptor then re-open the
+ // file for reading this time.
+ ASSERT_EQ(close(Options.Fd), 0);
+ int Fd = open(TmpFilename, O_RDONLY);
+ ASSERT_NE(-1, Fd);
+ ScopedFileCloserAndDeleter Guard(Fd, TmpFilename);
+ auto Size = lseek(Fd, 0, SEEK_END);
+ ASSERT_NE(Size, 0);
+ // Map the file contents.
+ void *Map = mmap(NULL, Size, PROT_READ, MAP_PRIVATE, Fd, 0);
+ const char *Contents = static_cast<const char *>(Map);
+ Guard.registerMap(Map, Size);
ASSERT_NE(Contents, nullptr);
XRayFileHeader H;
memcpy(&H, Contents, sizeof(XRayFileHeader));
- ASSERT_EQ(H.Version, 1);
+ ASSERT_EQ(H.Version, 2);
ASSERT_EQ(H.Type, FileTypes::FDR_LOG);
- MetadataRecord MDR0;
+ MetadataRecord MDR0, MDR1;
memcpy(&MDR0, Contents + sizeof(XRayFileHeader), sizeof(MetadataRecord));
- ASSERT_EQ(MDR0.RecordKind, uint8_t(MetadataRecord::RecordKinds::NewBuffer));
+ memcpy(&MDR1, Contents + sizeof(XRayFileHeader) + sizeof(MetadataRecord),
+ sizeof(MetadataRecord));
+ ASSERT_EQ(MDR0.RecordKind,
+ uint8_t(MetadataRecord::RecordKinds::BufferExtents));
+ ASSERT_EQ(MDR1.RecordKind, uint8_t(MetadataRecord::RecordKinds::NewBuffer));
+ pid_t Latest = 0;
+ memcpy(&Latest, MDR1.Data, sizeof(pid_t));
+ ASSERT_EQ(Latest, Threads[1]);
}
} // namespace
diff --git a/lib/xray/xray_buffer_queue.cc b/lib/xray/xray_buffer_queue.cc
index 34bf3cfd8..a0018f6b0 100644
--- a/lib/xray/xray_buffer_queue.cc
+++ b/lib/xray/xray_buffer_queue.cc
@@ -21,24 +21,25 @@ using namespace __xray;
using namespace __sanitizer;
BufferQueue::BufferQueue(size_t B, size_t N, bool &Success)
- : BufferSize(B),
- Buffers(new BufferRep[N]()),
- BufferCount(N),
- Finalizing{0},
- OwnedBuffers(new void *[N]()),
- Next(Buffers),
- First(Buffers),
+ : BufferSize(B), Buffers(new BufferRep[N]()), BufferCount(N), Finalizing{0},
+ OwnedBuffers(new void *[N]()), Next(Buffers), First(Buffers),
LiveBuffers(0) {
for (size_t i = 0; i < N; ++i) {
auto &T = Buffers[i];
- void *Tmp = InternalAlloc(BufferSize);
+ void *Tmp = InternalAlloc(BufferSize, nullptr, 64);
if (Tmp == nullptr) {
Success = false;
return;
}
+ void *Extents = InternalAlloc(sizeof(BufferExtents), nullptr, 64);
+ if (Extents == nullptr) {
+ Success = false;
+ return;
+ }
auto &Buf = T.Buff;
Buf.Buffer = Tmp;
Buf.Size = B;
+ Buf.Extents = reinterpret_cast<BufferExtents *>(Extents);
OwnedBuffers[i] = Tmp;
}
Success = true;
@@ -48,14 +49,17 @@ BufferQueue::ErrorCode BufferQueue::getBuffer(Buffer &Buf) {
if (__sanitizer::atomic_load(&Finalizing, __sanitizer::memory_order_acquire))
return ErrorCode::QueueFinalizing;
__sanitizer::SpinMutexLock Guard(&Mutex);
- if (LiveBuffers == BufferCount) return ErrorCode::NotEnoughMemory;
+ if (LiveBuffers == BufferCount)
+ return ErrorCode::NotEnoughMemory;
auto &T = *Next;
auto &B = T.Buff;
Buf = B;
+ T.Used = true;
++LiveBuffers;
- if (++Next == (Buffers + BufferCount)) Next = Buffers;
+ if (++Next == (Buffers + BufferCount))
+ Next = Buffers;
return ErrorCode::Ok;
}
@@ -69,13 +73,15 @@ BufferQueue::ErrorCode BufferQueue::releaseBuffer(Buffer &Buf) {
break;
}
}
- if (!Found) return ErrorCode::UnrecognizedBuffer;
+ if (!Found)
+ return ErrorCode::UnrecognizedBuffer;
__sanitizer::SpinMutexLock Guard(&Mutex);
// This points to a semantic bug, we really ought to not be releasing more
// buffers than we actually get.
- if (LiveBuffers == 0) return ErrorCode::NotEnoughMemory;
+ if (LiveBuffers == 0)
+ return ErrorCode::NotEnoughMemory;
// Now that the buffer has been released, we mark it as "used".
First->Buff = Buf;
@@ -83,7 +89,8 @@ BufferQueue::ErrorCode BufferQueue::releaseBuffer(Buffer &Buf) {
Buf.Buffer = nullptr;
Buf.Size = 0;
--LiveBuffers;
- if (++First == (Buffers + BufferCount)) First = Buffers;
+ if (++First == (Buffers + BufferCount))
+ First = Buffers;
return ErrorCode::Ok;
}
@@ -100,6 +107,7 @@ BufferQueue::~BufferQueue() {
auto &T = *I;
auto &Buf = T.Buff;
InternalFree(Buf.Buffer);
+ InternalFree(Buf.Extents);
}
delete[] Buffers;
delete[] OwnedBuffers;
diff --git a/lib/xray/xray_buffer_queue.h b/lib/xray/xray_buffer_queue.h
index 9257657a3..1ceb58274 100644
--- a/lib/xray/xray_buffer_queue.h
+++ b/lib/xray/xray_buffer_queue.h
@@ -28,9 +28,14 @@ namespace __xray {
/// trace collection.
class BufferQueue {
public:
+ struct alignas(64) BufferExtents {
+ __sanitizer::atomic_uint64_t Size;
+ };
+
struct Buffer {
void *Buffer = nullptr;
size_t Size = 0;
+ BufferExtents* Extents;
};
private:
diff --git a/lib/xray/xray_fdr_log_records.h b/lib/xray/xray_fdr_log_records.h
index f475e81cf..324208db8 100644
--- a/lib/xray/xray_fdr_log_records.h
+++ b/lib/xray/xray_fdr_log_records.h
@@ -31,7 +31,9 @@ struct alignas(16) MetadataRecord {
WalltimeMarker,
CustomEventMarker,
CallArgument,
+ BufferExtents,
};
+
// Use 7 bits to identify this record type.
/* RecordKinds */ uint8_t RecordKind : 7;
char Data[15];
diff --git a/lib/xray/xray_fdr_logging.cc b/lib/xray/xray_fdr_logging.cc
index cf27acc24..035fc4061 100644
--- a/lib/xray/xray_fdr_logging.cc
+++ b/lib/xray/xray_fdr_logging.cc
@@ -15,9 +15,9 @@
//
//===----------------------------------------------------------------------===//
#include "xray_fdr_logging.h"
+#include <errno.h>
#include <sys/syscall.h>
#include <sys/time.h>
-#include <errno.h>
#include <time.h>
#include <unistd.h>
@@ -35,10 +35,7 @@
namespace __xray {
// Global BufferQueue.
-// NOTE: This is a pointer to avoid having to do atomic operations at
-// initialization time. This is OK to leak as there will only be one bufferqueue
-// for the runtime, initialized once through the fdrInit(...) sequence.
-std::shared_ptr<BufferQueue> *BQ = nullptr;
+BufferQueue *BQ = nullptr;
__sanitizer::atomic_sint32_t LogFlushStatus = {
XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
@@ -51,19 +48,37 @@ __sanitizer::SpinMutex FDROptionsMutex;
XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT {
if (__sanitizer::atomic_load(&LoggingStatus,
__sanitizer::memory_order_acquire) !=
- XRayLogInitStatus::XRAY_LOG_FINALIZED)
+ XRayLogInitStatus::XRAY_LOG_FINALIZED) {
+ if (__sanitizer::Verbosity())
+ Report("Not flushing log, implementation is not finalized.\n");
return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
+ }
s32 Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
if (!__sanitizer::atomic_compare_exchange_strong(
&LogFlushStatus, &Result, XRayLogFlushStatus::XRAY_LOG_FLUSHING,
- __sanitizer::memory_order_release))
+ __sanitizer::memory_order_release)) {
+
+ if (__sanitizer::Verbosity())
+ Report("Not flushing log, implementation is still finalizing.\n");
return static_cast<XRayLogFlushStatus>(Result);
+ }
- // Make a copy of the BufferQueue pointer to prevent other threads that may be
- // resetting it from blowing away the queue prematurely while we're dealing
- // with it.
- auto LocalBQ = *BQ;
+ if (BQ == nullptr) {
+ if (__sanitizer::Verbosity())
+ Report("Cannot flush when global buffer queue is null.\n");
+ return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
+ }
+
+ // We wait a number of microseconds to allow threads to see that we've
+ // finalised before attempting to flush the log.
+ struct timespec TS;
+ TS.tv_sec = flags()->xray_fdr_log_grace_period_us / 1000000;
+ TS.tv_nsec = (flags()->xray_fdr_log_grace_period_us % 1000000) * 1000;
+ struct timespec Rem;
+ while (clock_nanosleep(CLOCK_REALTIME, 0, &TS, &Rem) &&
+ (Rem.tv_sec != 0 || Rem.tv_nsec != 0))
+ TS = Rem;
// We write out the file in the following format:
//
@@ -94,34 +109,44 @@ XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT {
TSCSupported ? getTSCFrequency() : __xray::NanosecondsPerSecond;
XRayFileHeader Header;
- Header.Version = 1;
+
+ // Version 2 of the log writes the extents of the buffer, instead of relying
+ // on an end-of-buffer record.
+ Header.Version = 2;
Header.Type = FileTypes::FDR_LOG;
Header.CycleFrequency = CycleFrequency;
+
// FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc'
// before setting the values in the header.
Header.ConstantTSC = 1;
Header.NonstopTSC = 1;
- Header.FdrData = FdrAdditionalHeaderData{LocalBQ->ConfiguredBufferSize()};
+ Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()};
retryingWriteAll(Fd, reinterpret_cast<char *>(&Header),
reinterpret_cast<char *>(&Header) + sizeof(Header));
- LocalBQ->apply([&](const BufferQueue::Buffer &B) {
- uint64_t BufferSize = B.Size;
- if (BufferSize > 0) {
+ BQ->apply([&](const BufferQueue::Buffer &B) {
+ // Starting at version 2 of the FDR logging implementation, we only write
+ // the records identified by the extents of the buffer. We use the Extents
+ // from the Buffer and write that out as the first record in the buffer.
+ // We still use a Metadata record, but fill in the extents instead for the
+ // data.
+ MetadataRecord ExtentsRecord;
+ auto BufferExtents = __sanitizer::atomic_load(
+ &B.Extents->Size, __sanitizer::memory_order_acquire);
+ assert(BufferExtents <= B.Size);
+ ExtentsRecord.Type = uint8_t(RecordType::Metadata);
+ ExtentsRecord.RecordKind =
+ uint8_t(MetadataRecord::RecordKinds::BufferExtents);
+ std::memcpy(ExtentsRecord.Data, &BufferExtents, sizeof(BufferExtents));
+ if (BufferExtents > 0) {
+ retryingWriteAll(Fd, reinterpret_cast<char *>(&ExtentsRecord),
+ reinterpret_cast<char *>(&ExtentsRecord) +
+ sizeof(MetadataRecord));
retryingWriteAll(Fd, reinterpret_cast<char *>(B.Buffer),
- reinterpret_cast<char *>(B.Buffer) + B.Size);
+ reinterpret_cast<char *>(B.Buffer) + BufferExtents);
}
});
- // The buffer for this particular thread would have been finalised after
- // we've written everything to disk, and we'd lose the thread's trace.
- auto &TLD = __xray::__xray_fdr_internal::getThreadLocalData();
- if (TLD.Buffer.Buffer != nullptr) {
- __xray::__xray_fdr_internal::writeEOBMetadata();
- auto Start = reinterpret_cast<char *>(TLD.Buffer.Buffer);
- retryingWriteAll(Fd, Start, Start + TLD.Buffer.Size);
- }
-
__sanitizer::atomic_store(&LogFlushStatus,
XRayLogFlushStatus::XRAY_LOG_FLUSHED,
__sanitizer::memory_order_release);
@@ -133,12 +158,15 @@ XRayLogInitStatus fdrLoggingFinalize() XRAY_NEVER_INSTRUMENT {
if (!__sanitizer::atomic_compare_exchange_strong(
&LoggingStatus, &CurrentStatus,
XRayLogInitStatus::XRAY_LOG_FINALIZING,
- __sanitizer::memory_order_release))
+ __sanitizer::memory_order_release)) {
+ if (__sanitizer::Verbosity())
+ Report("Cannot finalize log, implementation not initialized.\n");
return static_cast<XRayLogInitStatus>(CurrentStatus);
+ }
// Do special things to make the log finalize itself, and not allow any more
// operations to be performed until re-initialized.
- (*BQ)->finalize();
+ BQ->finalize();
__sanitizer::atomic_store(&LoggingStatus,
XRayLogInitStatus::XRAY_LOG_FINALIZED,
@@ -155,7 +183,8 @@ XRayLogInitStatus fdrLoggingReset() XRAY_NEVER_INSTRUMENT {
return static_cast<XRayLogInitStatus>(CurrentStatus);
// Release the in-memory buffer queue.
- BQ->reset();
+ delete BQ;
+ BQ = nullptr;
// Spin until the flushing status is flushed.
s32 CurrentFlushingStatus = XRayLogFlushStatus::XRAY_LOG_FLUSHED;
@@ -173,8 +202,8 @@ XRayLogInitStatus fdrLoggingReset() XRAY_NEVER_INSTRUMENT {
}
struct TSCAndCPU {
- uint64_t TSC;
- unsigned char CPU;
+ uint64_t TSC = 0;
+ unsigned char CPU = 0;
};
static TSCAndCPU getTimestamp() XRAY_NEVER_INSTRUMENT {
@@ -205,15 +234,15 @@ static TSCAndCPU getTimestamp() XRAY_NEVER_INSTRUMENT {
void fdrLoggingHandleArg0(int32_t FuncId,
XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
auto TC = getTimestamp();
- __xray_fdr_internal::processFunctionHook(FuncId, Entry, TC.TSC,
- TC.CPU, 0, clock_gettime, *BQ);
+ __xray_fdr_internal::processFunctionHook(FuncId, Entry, TC.TSC, TC.CPU, 0,
+ clock_gettime, BQ);
}
void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry,
uint64_t Arg) XRAY_NEVER_INSTRUMENT {
auto TC = getTimestamp();
- __xray_fdr_internal::processFunctionHook(
- FuncId, Entry, TC.TSC, TC.CPU, Arg, clock_gettime, *BQ);
+ __xray_fdr_internal::processFunctionHook(FuncId, Entry, TC.TSC, TC.CPU, Arg,
+ clock_gettime, BQ);
}
void fdrLoggingHandleCustomEvent(void *Event,
@@ -223,10 +252,8 @@ void fdrLoggingHandleCustomEvent(void *Event,
auto &TSC = TC.TSC;
auto &CPU = TC.CPU;
RecursionGuard Guard{Running};
- if (!Guard) {
- assert(Running && "RecursionGuard is buggy!");
+ if (!Guard)
return;
- }
if (EventSize > std::numeric_limits<int32_t>::max()) {
using Empty = struct {};
static Empty Once = [&] {
@@ -238,7 +265,7 @@ void fdrLoggingHandleCustomEvent(void *Event,
}
int32_t ReducedEventSize = static_cast<int32_t>(EventSize);
auto &TLD = getThreadLocalData();
- if (!isLogInitializedAndReady(TLD.LocalBQ, TSC, CPU, clock_gettime))
+ if (!isLogInitializedAndReady(TLD.BQ, TSC, CPU, clock_gettime))
return;
// Here we need to prepare the log to handle:
@@ -246,7 +273,7 @@ void fdrLoggingHandleCustomEvent(void *Event,
// - The additional data we're going to write. Currently, that's the size of
// the event we're going to dump into the log as free-form bytes.
if (!prepareBuffer(TSC, CPU, clock_gettime, MetadataRecSize + EventSize)) {
- TLD.LocalBQ = nullptr;
+ TLD.BQ = nullptr;
return;
}
@@ -264,21 +291,29 @@ void fdrLoggingHandleCustomEvent(void *Event,
std::memcpy(TLD.RecordPtr, &CustomEvent, sizeof(CustomEvent));
TLD.RecordPtr += sizeof(CustomEvent);
std::memcpy(TLD.RecordPtr, Event, ReducedEventSize);
+ incrementExtents(MetadataRecSize + EventSize);
endBufferIfFull();
}
XRayLogInitStatus fdrLoggingInit(std::size_t BufferSize, std::size_t BufferMax,
void *Options,
size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
- if (OptionsSize != sizeof(FDRLoggingOptions))
+ if (OptionsSize != sizeof(FDRLoggingOptions)) {
+ if (__sanitizer::Verbosity())
+ Report("Cannot initialize FDR logging; wrong size for options: %d\n",
+ OptionsSize);
return static_cast<XRayLogInitStatus>(__sanitizer::atomic_load(
&LoggingStatus, __sanitizer::memory_order_acquire));
+ }
s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
if (!__sanitizer::atomic_compare_exchange_strong(
&LoggingStatus, &CurrentStatus,
XRayLogInitStatus::XRAY_LOG_INITIALIZING,
- __sanitizer::memory_order_release))
+ __sanitizer::memory_order_release)) {
+ if (__sanitizer::Verbosity())
+ Report("Cannot initialize already initialized implementation.\n");
return static_cast<XRayLogInitStatus>(CurrentStatus);
+ }
{
__sanitizer::SpinMutexLock Guard(&FDROptionsMutex);
@@ -286,15 +321,37 @@ XRayLogInitStatus fdrLoggingInit(std::size_t BufferSize, std::size_t BufferMax,
}
bool Success = false;
+
+ if (BQ != nullptr) {
+ delete BQ;
+ BQ = nullptr;
+ }
+
if (BQ == nullptr)
- BQ = new std::shared_ptr<BufferQueue>();
+ BQ = new BufferQueue(BufferSize, BufferMax, Success);
- *BQ = std::make_shared<BufferQueue>(BufferSize, BufferMax, Success);
if (!Success) {
Report("BufferQueue init failed.\n");
+ if (BQ != nullptr) {
+ delete BQ;
+ BQ = nullptr;
+ }
return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
}
+ static bool UNUSED Once = [] {
+ pthread_key_create(&__xray_fdr_internal::Key, +[](void *) {
+ auto &TLD = __xray_fdr_internal::getThreadLocalData();
+ if (TLD.BQ == nullptr)
+ return;
+ auto EC = TLD.BQ->releaseBuffer(TLD.Buffer);
+ if (EC != BufferQueue::ErrorCode::Ok)
+ Report("At thread exit, failed to release buffer at %p; error=%s\n",
+ TLD.Buffer.Buffer, BufferQueue::getErrorString(EC));
+ });
+ return false;
+ }();
+
// Arg1 handler should go in first to avoid concurrent code accidentally
// falling back to arg0 when it should have ran arg1.
__xray_set_handler_arg1(fdrLoggingHandleArg1);
@@ -305,7 +362,9 @@ XRayLogInitStatus fdrLoggingInit(std::size_t BufferSize, std::size_t BufferMax,
__sanitizer::atomic_store(&LoggingStatus,
XRayLogInitStatus::XRAY_LOG_INITIALIZED,
__sanitizer::memory_order_release);
- Report("XRay FDR init successful.\n");
+
+ if (__sanitizer::Verbosity())
+ Report("XRay FDR init successful.\n");
return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
}
diff --git a/lib/xray/xray_fdr_logging_impl.h b/lib/xray/xray_fdr_logging_impl.h
index c109d1384..59eab55b2 100644
--- a/lib/xray/xray_fdr_logging_impl.h
+++ b/lib/xray/xray_fdr_logging_impl.h
@@ -24,11 +24,9 @@
#include <pthread.h>
#include <sys/syscall.h>
#include <time.h>
+#include <type_traits>
#include <unistd.h>
-// FIXME: Implement analogues to std::shared_ptr and std::weak_ptr
-#include <memory>
-
#include "sanitizer_common/sanitizer_common.h"
#include "xray/xray_log_interface.h"
#include "xray_buffer_queue.h"
@@ -54,42 +52,19 @@ __sanitizer::atomic_sint32_t LoggingStatus = {
/// cooperation with xray_fdr_logging class, so be careful and think twice.
namespace __xray_fdr_internal {
-/// Writes the new buffer record and wallclock time that begin a buffer for a
-/// thread to MemPtr and increments MemPtr. Bypasses the thread local state
-/// machine and writes directly to memory without checks.
-static void writeNewBufferPreamble(pid_t Tid, timespec TS, char *&MemPtr);
-
-/// Write a metadata record to switch to a new CPU to MemPtr and increments
-/// MemPtr. Bypasses the thread local state machine and writes directly to
-/// memory without checks.
-static void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC, char *&MemPtr);
-
-/// Writes an EOB metadata record to MemPtr and increments MemPtr. Bypasses the
-/// thread local state machine and writes directly to memory without checks.
-static void writeEOBMetadata(char *&MemPtr);
+/// Writes the new buffer record and wallclock time that begin a buffer for the
+/// current thread.
+static void writeNewBufferPreamble(pid_t Tid, timespec TS);
-/// Writes a TSC Wrap metadata record to MemPtr and increments MemPtr. Bypasses
-/// the thread local state machine and directly writes to memory without checks.
-static void writeTSCWrapMetadata(uint64_t TSC, char *&MemPtr);
-
-/// Writes a Function Record to MemPtr and increments MemPtr. Bypasses the
-/// thread local state machine and writes the function record directly to
-/// memory.
+/// Writes a Function Record to the buffer associated with the current thread.
static void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
- XRayEntryType EntryType, char *&MemPtr);
+ XRayEntryType EntryType);
/// Sets up a new buffer in thread_local storage and writes a preamble. The
/// wall_clock_reader function is used to populate the WallTimeRecord entry.
static void setupNewBuffer(int (*wall_clock_reader)(clockid_t,
struct timespec *));
-/// Called to record CPU time for a new CPU within the current thread.
-static void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC);
-
-/// Called to close the buffer when the thread exhausts the buffer or when the
-/// thread exits (via a thread local variable destructor).
-static void writeEOBMetadata();
-
/// TSC Wrap records are written when a TSC delta encoding scheme overflows.
static void writeTSCWrapMetadata(uint64_t TSC);
@@ -119,15 +94,18 @@ struct alignas(64) ThreadLocalData {
// Make sure a thread that's ever called handleArg0 has a thread-local
// live reference to the buffer queue for this particular instance of
// FDRLogging, and that we're going to clean it up when the thread exits.
- std::shared_ptr<BufferQueue> LocalBQ = nullptr;
+ BufferQueue *BQ = nullptr;
};
-// Forward-declare, defined later.
-static ThreadLocalData &getThreadLocalData();
+static_assert(std::is_trivially_destructible<ThreadLocalData>::value,
+ "ThreadLocalData must be trivially destructible");
static constexpr auto MetadataRecSize = sizeof(MetadataRecord);
static constexpr auto FunctionRecSize = sizeof(FunctionRecord);
+// Use a global pthread key to identify thread-local data for logging.
+static pthread_key_t Key;
+
// This function will initialize the thread-local data structure used by the FDR
// logging implementation and return a reference to it. The implementation
// details require a bit of care to maintain.
@@ -149,15 +127,9 @@ static constexpr auto FunctionRecSize = sizeof(FunctionRecord);
// ThreadLocalData struct. This data will be uninitialized memory by
// design.
//
-// 2. Using pthread_once(...) to initialize the thread-local data structures
-// on first use, for every thread. We don't use std::call_once so we don't
-// have a reliance on the C++ runtime library.
-//
-// 3. Registering a cleanup function that gets run at the end of a thread's
-// lifetime through pthread_create_key(...). The cleanup function would
-// allow us to release the thread-local resources in a manner that would
-// let the rest of the XRay runtime implementation handle the records
-// written for this thread's active buffer.
+// 2. Not requiring a thread exit handler/implementation, keeping the
+// thread-local as purely a collection of references/data that do not
+// require cleanup.
//
// We're doing this to avoid using a `thread_local` object that has a
// non-trivial destructor, because the C++ runtime might call std::malloc(...)
@@ -168,55 +140,15 @@ static constexpr auto FunctionRecSize = sizeof(FunctionRecord);
// critical section, calling a function that might be XRay instrumented (and
// thus in turn calling into malloc by virtue of registration of the
// thread_local's destructor).
-//
-// With the approach taken where, we attempt to avoid the potential for
-// deadlocks by relying instead on pthread's memory management routines.
static ThreadLocalData &getThreadLocalData() {
- thread_local pthread_key_t key;
-
- // We need aligned, uninitialized storage for the TLS object which is
- // trivially destructible. We're going to use this as raw storage and
- // placement-new the ThreadLocalData object into it later.
- alignas(alignof(ThreadLocalData)) thread_local unsigned char
- TLSBuffer[sizeof(ThreadLocalData)];
-
- // Ensure that we only actually ever do the pthread initialization once.
- thread_local bool UNUSED Unused = [] {
- new (&TLSBuffer) ThreadLocalData();
- auto result = pthread_key_create(&key, +[](void *) {
- auto &TLD = *reinterpret_cast<ThreadLocalData *>(&TLSBuffer);
- auto &RecordPtr = TLD.RecordPtr;
- auto &Buffers = TLD.LocalBQ;
- auto &Buffer = TLD.Buffer;
- if (RecordPtr == nullptr)
- return;
-
- // We make sure that upon exit, a thread will write out the EOB
- // MetadataRecord in the thread-local log, and also release the buffer
- // to the queue.
- assert((RecordPtr + MetadataRecSize) -
- static_cast<char *>(Buffer.Buffer) >=
- static_cast<ptrdiff_t>(MetadataRecSize));
- if (Buffers) {
- writeEOBMetadata();
- auto EC = Buffers->releaseBuffer(Buffer);
- if (EC != BufferQueue::ErrorCode::Ok)
- Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer,
- BufferQueue::getErrorString(EC));
- Buffers = nullptr;
- return;
- }
- });
- if (result != 0) {
- Report("Failed to allocate thread-local data through pthread; error=%d",
- result);
- return false;
- }
- pthread_setspecific(key, &TLSBuffer);
- return true;
+ static_assert(alignof(ThreadLocalData) >= 64,
+ "ThreadLocalData must be cache line aligned.");
+ thread_local ThreadLocalData TLD;
+ thread_local bool UNUSED ThreadOnce = [] {
+ pthread_setspecific(Key, &TLD);
+ return false;
}();
-
- return *reinterpret_cast<ThreadLocalData *>(TLSBuffer);
+ return TLD;
}
//-----------------------------------------------------------------------------|
@@ -253,27 +185,27 @@ public:
} // namespace
-inline void writeNewBufferPreamble(pid_t Tid, timespec TS,
- char *&MemPtr) XRAY_NEVER_INSTRUMENT {
+static void writeNewBufferPreamble(pid_t Tid,
+ timespec TS) XRAY_NEVER_INSTRUMENT {
static constexpr int InitRecordsCount = 2;
- alignas(alignof(MetadataRecord)) unsigned char
- Records[InitRecordsCount * MetadataRecSize];
+ auto &TLD = getThreadLocalData();
+ MetadataRecord Metadata[InitRecordsCount];
{
// Write out a MetadataRecord to signify that this is the start of a new
// buffer, associated with a particular thread, with a new CPU. For the
// data, we have 15 bytes to squeeze as much information as we can. At this
// point we only write down the following bytes:
// - Thread ID (pid_t, 4 bytes)
- auto &NewBuffer = *reinterpret_cast<MetadataRecord *>(Records);
+ auto &NewBuffer = Metadata[0];
NewBuffer.Type = uint8_t(RecordType::Metadata);
NewBuffer.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewBuffer);
std::memcpy(&NewBuffer.Data, &Tid, sizeof(pid_t));
}
+
// Also write the WalltimeMarker record.
{
static_assert(sizeof(time_t) <= 8, "time_t needs to be at most 8 bytes");
- auto &WalltimeMarker =
- *reinterpret_cast<MetadataRecord *>(Records + MetadataRecSize);
+ auto &WalltimeMarker = Metadata[1];
WalltimeMarker.Type = uint8_t(RecordType::Metadata);
WalltimeMarker.RecordKind =
uint8_t(MetadataRecord::RecordKinds::WalltimeMarker);
@@ -286,30 +218,47 @@ inline void writeNewBufferPreamble(pid_t Tid, timespec TS,
std::memcpy(WalltimeMarker.Data, &Seconds, sizeof(Seconds));
std::memcpy(WalltimeMarker.Data + sizeof(Seconds), &Micros, sizeof(Micros));
}
- std::memcpy(MemPtr, Records, sizeof(MetadataRecord) * InitRecordsCount);
- MemPtr += sizeof(MetadataRecord) * InitRecordsCount;
- auto &TLD = getThreadLocalData();
+
TLD.NumConsecutiveFnEnters = 0;
TLD.NumTailCalls = 0;
+ if (TLD.BQ == nullptr || TLD.BQ->finalizing())
+ return;
+ std::memcpy(TLD.RecordPtr, Metadata, sizeof(Metadata));
+ TLD.RecordPtr += sizeof(Metadata);
+ // Since we write out the extents as the first metadata record of the
+ // buffer, we need to write out the extents including the extents record.
+ __sanitizer::atomic_store(&TLD.Buffer.Extents->Size, sizeof(Metadata),
+ __sanitizer::memory_order_release);
}
inline void setupNewBuffer(int (*wall_clock_reader)(
clockid_t, struct timespec *)) XRAY_NEVER_INSTRUMENT {
auto &TLD = getThreadLocalData();
- auto &Buffer = TLD.Buffer;
- auto &RecordPtr = TLD.RecordPtr;
- RecordPtr = static_cast<char *>(Buffer.Buffer);
+ auto &B = TLD.Buffer;
+ TLD.RecordPtr = static_cast<char *>(B.Buffer);
pid_t Tid = syscall(SYS_gettid);
timespec TS{0, 0};
// This is typically clock_gettime, but callers have injection ability.
wall_clock_reader(CLOCK_MONOTONIC, &TS);
- writeNewBufferPreamble(Tid, TS, RecordPtr);
+ writeNewBufferPreamble(Tid, TS);
TLD.NumConsecutiveFnEnters = 0;
TLD.NumTailCalls = 0;
}
-inline void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC,
- char *&MemPtr) XRAY_NEVER_INSTRUMENT {
+static void incrementExtents(size_t Add) {
+ auto &TLD = getThreadLocalData();
+ __sanitizer::atomic_fetch_add(&TLD.Buffer.Extents->Size, Add,
+ __sanitizer::memory_order_acq_rel);
+}
+
+static void decrementExtents(size_t Subtract) {
+ auto &TLD = getThreadLocalData();
+ __sanitizer::atomic_fetch_sub(&TLD.Buffer.Extents->Size, Subtract,
+ __sanitizer::memory_order_acq_rel);
+}
+
+inline void writeNewCPUIdMetadata(uint16_t CPU,
+ uint64_t TSC) XRAY_NEVER_INSTRUMENT {
auto &TLD = getThreadLocalData();
MetadataRecord NewCPUId;
NewCPUId.Type = uint8_t(RecordType::Metadata);
@@ -321,35 +270,14 @@ inline void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC,
// Total = 10 bytes.
std::memcpy(&NewCPUId.Data, &CPU, sizeof(CPU));
std::memcpy(&NewCPUId.Data[sizeof(CPU)], &TSC, sizeof(TSC));
- std::memcpy(MemPtr, &NewCPUId, sizeof(MetadataRecord));
- MemPtr += sizeof(MetadataRecord);
- TLD.NumConsecutiveFnEnters = 0;
- TLD.NumTailCalls = 0;
-}
-
-inline void writeNewCPUIdMetadata(uint16_t CPU,
- uint64_t TSC) XRAY_NEVER_INSTRUMENT {
- writeNewCPUIdMetadata(CPU, TSC, getThreadLocalData().RecordPtr);
-}
-
-inline void writeEOBMetadata(char *&MemPtr) XRAY_NEVER_INSTRUMENT {
- auto &TLD = getThreadLocalData();
- MetadataRecord EOBMeta;
- EOBMeta.Type = uint8_t(RecordType::Metadata);
- EOBMeta.RecordKind = uint8_t(MetadataRecord::RecordKinds::EndOfBuffer);
- // For now we don't write any bytes into the Data field.
- std::memcpy(MemPtr, &EOBMeta, sizeof(MetadataRecord));
- MemPtr += sizeof(MetadataRecord);
+ std::memcpy(TLD.RecordPtr, &NewCPUId, sizeof(MetadataRecord));
+ TLD.RecordPtr += sizeof(MetadataRecord);
TLD.NumConsecutiveFnEnters = 0;
TLD.NumTailCalls = 0;
+ incrementExtents(sizeof(MetadataRecord));
}
-inline void writeEOBMetadata() XRAY_NEVER_INSTRUMENT {
- writeEOBMetadata(getThreadLocalData().RecordPtr);
-}
-
-inline void writeTSCWrapMetadata(uint64_t TSC,
- char *&MemPtr) XRAY_NEVER_INSTRUMENT {
+inline void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT {
auto &TLD = getThreadLocalData();
MetadataRecord TSCWrap;
TSCWrap.Type = uint8_t(RecordType::Metadata);
@@ -359,14 +287,11 @@ inline void writeTSCWrapMetadata(uint64_t TSC,
// - Full TSC (uint64_t, 8 bytes)
// Total = 8 bytes.
std::memcpy(&TSCWrap.Data, &TSC, sizeof(TSC));
- std::memcpy(MemPtr, &TSCWrap, sizeof(MetadataRecord));
- MemPtr += sizeof(MetadataRecord);
+ std::memcpy(TLD.RecordPtr, &TSCWrap, sizeof(MetadataRecord));
+ TLD.RecordPtr += sizeof(MetadataRecord);
TLD.NumConsecutiveFnEnters = 0;
TLD.NumTailCalls = 0;
-}
-
-inline void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT {
- writeTSCWrapMetadata(TSC, getThreadLocalData().RecordPtr);
+ incrementExtents(sizeof(MetadataRecord));
}
// Call Argument metadata records store the arguments to a function in the
@@ -380,11 +305,12 @@ static inline void writeCallArgumentMetadata(uint64_t A) XRAY_NEVER_INSTRUMENT {
std::memcpy(CallArg.Data, &A, sizeof(A));
std::memcpy(TLD.RecordPtr, &CallArg, sizeof(MetadataRecord));
TLD.RecordPtr += sizeof(MetadataRecord);
+ incrementExtents(sizeof(MetadataRecord));
}
-static inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
- XRayEntryType EntryType,
- char *&MemPtr) XRAY_NEVER_INSTRUMENT {
+static inline void
+writeFunctionRecord(int FuncId, uint32_t TSCDelta,
+ XRayEntryType EntryType) XRAY_NEVER_INSTRUMENT {
FunctionRecord FuncRecord;
FuncRecord.Type = uint8_t(RecordType::Function);
// Only take 28 bits of the function id.
@@ -439,8 +365,9 @@ static inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
}
}
- std::memcpy(MemPtr, &FuncRecord, sizeof(FunctionRecord));
- MemPtr += sizeof(FunctionRecord);
+ std::memcpy(TLD.RecordPtr, &FuncRecord, sizeof(FunctionRecord));
+ TLD.RecordPtr += sizeof(FunctionRecord);
+ incrementExtents(sizeof(FunctionRecord));
}
static uint64_t thresholdTicks() {
@@ -458,6 +385,7 @@ static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC,
uint64_t &LastFunctionEntryTSC, int32_t FuncId) {
auto &TLD = getThreadLocalData();
TLD.RecordPtr -= FunctionRecSize;
+ decrementExtents(FunctionRecSize);
FunctionRecord FuncRecord;
std::memcpy(&FuncRecord, TLD.RecordPtr, FunctionRecSize);
assert(FuncRecord.RecordKind ==
@@ -511,6 +439,7 @@ static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC,
RewindingTSC -= ExpectedFunctionEntry.TSCDelta;
TLD.RecordPtr -= 2 * FunctionRecSize;
LastTSC = RewindingTSC;
+ decrementExtents(2 * FunctionRecSize);
}
}
@@ -531,12 +460,10 @@ inline bool prepareBuffer(uint64_t TSC, unsigned char CPU,
size_t MaxSize) XRAY_NEVER_INSTRUMENT {
auto &TLD = getThreadLocalData();
char *BufferStart = static_cast<char *>(TLD.Buffer.Buffer);
- if ((TLD.RecordPtr + MaxSize) >
- (BufferStart + TLD.Buffer.Size - MetadataRecSize)) {
- writeEOBMetadata();
- if (!releaseThreadLocalBuffer(*TLD.LocalBQ))
+ if ((TLD.RecordPtr + MaxSize) > (BufferStart + TLD.Buffer.Size)) {
+ if (!releaseThreadLocalBuffer(*TLD.BQ))
return false;
- auto EC = TLD.LocalBQ->getBuffer(TLD.Buffer);
+ auto EC = TLD.BQ->getBuffer(TLD.Buffer);
if (EC != BufferQueue::ErrorCode::Ok) {
Report("Failed to acquire a buffer; error=%s\n",
BufferQueue::getErrorString(EC));
@@ -550,10 +477,10 @@ inline bool prepareBuffer(uint64_t TSC, unsigned char CPU,
return true;
}
-inline bool isLogInitializedAndReady(
- std::shared_ptr<BufferQueue> &LBQ, uint64_t TSC, unsigned char CPU,
- int (*wall_clock_reader)(clockid_t,
- struct timespec *)) XRAY_NEVER_INSTRUMENT {
+inline bool
+isLogInitializedAndReady(BufferQueue *LBQ, uint64_t TSC, unsigned char CPU,
+ int (*wall_clock_reader)(clockid_t, struct timespec *))
+ XRAY_NEVER_INSTRUMENT {
// Bail out right away if logging is not initialized yet.
// We should take the opportunity to release the buffer though.
auto Status = __sanitizer::atomic_load(&LoggingStatus,
@@ -563,11 +490,9 @@ inline bool isLogInitializedAndReady(
if (TLD.RecordPtr != nullptr &&
(Status == XRayLogInitStatus::XRAY_LOG_FINALIZING ||
Status == XRayLogInitStatus::XRAY_LOG_FINALIZED)) {
- writeEOBMetadata();
if (!releaseThreadLocalBuffer(*LBQ))
return false;
TLD.RecordPtr = nullptr;
- LBQ = nullptr;
return false;
}
return false;
@@ -577,7 +502,6 @@ inline bool isLogInitializedAndReady(
__sanitizer::memory_order_acquire) !=
XRayLogInitStatus::XRAY_LOG_INITIALIZED ||
LBQ->finalizing()) {
- writeEOBMetadata();
if (!releaseThreadLocalBuffer(*LBQ))
return false;
TLD.RecordPtr = nullptr;
@@ -650,9 +574,9 @@ inline uint32_t writeCurrentCPUTSC(ThreadLocalData &TLD, uint64_t TSC,
inline void endBufferIfFull() XRAY_NEVER_INSTRUMENT {
auto &TLD = getThreadLocalData();
auto BufferStart = static_cast<char *>(TLD.Buffer.Buffer);
- if ((TLD.RecordPtr + MetadataRecSize) - BufferStart == MetadataRecSize) {
- writeEOBMetadata();
- if (!releaseThreadLocalBuffer(*TLD.LocalBQ))
+ if ((TLD.RecordPtr + MetadataRecSize) - BufferStart <=
+ ptrdiff_t{MetadataRecSize}) {
+ if (!releaseThreadLocalBuffer(*TLD.BQ))
return;
TLD.RecordPtr = nullptr;
}
@@ -666,10 +590,11 @@ thread_local volatile bool Running = false;
/// walk backward through its buffer and erase trivial functions to avoid
/// polluting the log and may use the buffer queue to obtain or release a
/// buffer.
-inline void processFunctionHook(
- int32_t FuncId, XRayEntryType Entry, uint64_t TSC, unsigned char CPU,
- uint64_t Arg1, int (*wall_clock_reader)(clockid_t, struct timespec *),
- const std::shared_ptr<BufferQueue> &BQ) XRAY_NEVER_INSTRUMENT {
+inline void processFunctionHook(int32_t FuncId, XRayEntryType Entry,
+ uint64_t TSC, unsigned char CPU, uint64_t Arg1,
+ int (*wall_clock_reader)(clockid_t,
+ struct timespec *),
+ BufferQueue *BQ) XRAY_NEVER_INSTRUMENT {
// Prevent signal handler recursion, so in case we're already in a log writing
// mode and the signal handler comes in (and is also instrumented) then we
// don't want to be clobbering potentially partial writes already happening in
@@ -685,10 +610,10 @@ inline void processFunctionHook(
// In case the reference has been cleaned up before, we make sure we
// initialize it to the provided BufferQueue.
- if (TLD.LocalBQ == nullptr)
- TLD.LocalBQ = BQ;
+ if (TLD.BQ == nullptr)
+ TLD.BQ = BQ;
- if (!isLogInitializedAndReady(TLD.LocalBQ, TSC, CPU, wall_clock_reader))
+ if (!isLogInitializedAndReady(TLD.BQ, TSC, CPU, wall_clock_reader))
return;
// Before we go setting up writing new function entries, we need to be really
@@ -720,16 +645,13 @@ inline void processFunctionHook(
// id" MetadataRecord before writing out the actual FunctionRecord.
// 4. The second MetadataRecord is the optional function call argument.
//
- // - An End-of-Buffer (EOB) MetadataRecord is 16 bytes.
- //
- // So the math we need to do is to determine whether writing 24 bytes past the
- // current pointer leaves us with enough bytes to write the EOB
- // MetadataRecord. If we don't have enough space after writing as much as 24
- // bytes in the end of the buffer, we need to write out the EOB, get a new
- // Buffer, set it up properly before doing any further writing.
+ // So the math we need to do is to determine whether writing 40 bytes past the
+ // current pointer exceeds the buffer's maximum size. If we don't have enough
+ // space to write 40 bytes in the buffer, we need get a new Buffer, set it up
+ // properly before doing any further writing.
size_t MaxSize = FunctionRecSize + 2 * MetadataRecSize;
if (!prepareBuffer(TSC, CPU, wall_clock_reader, MaxSize)) {
- TLD.LocalBQ = nullptr;
+ TLD.BQ = nullptr;
return;
}
@@ -768,7 +690,7 @@ inline void processFunctionHook(
}
}
- writeFunctionRecord(FuncId, RecordTSCDelta, Entry, TLD.RecordPtr);
+ writeFunctionRecord(FuncId, RecordTSCDelta, Entry);
if (Entry == XRayEntryType::LOG_ARGS_ENTRY)
writeCallArgumentMetadata(Arg1);
diff --git a/lib/xray/xray_flags.inc b/lib/xray/xray_flags.inc
index 7ddce78eb..890f6fda2 100644
--- a/lib/xray/xray_flags.inc
+++ b/lib/xray/xray_flags.inc
@@ -25,3 +25,7 @@ XRAY_FLAG(bool, xray_fdr_log, false,
XRAY_FLAG(int, xray_fdr_log_func_duration_threshold_us, 5,
"FDR logging will try to skip functions that execute for fewer "
"microseconds than this threshold.")
+XRAY_FLAG(int, xray_fdr_log_grace_period_us, 100000,
+ "FDR logging will wait this much time in microseconds before "
+ "actually flushing the log; this gives a chance for threads to "
+ "notice that the log has been finalized and clean up.")