summaryrefslogtreecommitdiff
path: root/lib/xray
diff options
context:
space:
mode:
authorDean Michael Berris <dberris@google.com>2017-05-12 01:07:41 +0000
committerDean Michael Berris <dberris@google.com>2017-05-12 01:07:41 +0000
commitf9947480f58b10f2460df24287f95357082b6563 (patch)
treed9b7f4f86d716227a89c4a94bb3cc1e79af3559e /lib/xray
parenta5c91c22f175eddf6f2d4d244dce6ce6a2e3c9e1 (diff)
[XRay][compiler-rt] Runtime changes to support custom event logging
Summary: This change implements support for the custom event logging sleds and intrinsics at runtime. For now it only supports handling the sleds in x86_64, with the implementations for other architectures stubbed out to do nothing. NOTE: Work in progress, uploaded for exposition/exploration purposes. Depends on D27503, D30018, and D33032. Reviewers: echristo, javed.absar, timshen Subscribers: mehdi_amini, nemanjai, llvm-commits Differential Revision: https://reviews.llvm.org/D30630 git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@302857 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/xray')
-rw-r--r--lib/xray/xray_AArch64.cc13
-rw-r--r--lib/xray/xray_arm.cc12
-rw-r--r--lib/xray/xray_fdr_log_records.h1
-rw-r--r--lib/xray/xray_fdr_logging.cc150
-rw-r--r--lib/xray/xray_fdr_logging_impl.h223
-rw-r--r--lib/xray/xray_interface.cc21
-rw-r--r--lib/xray/xray_interface_internal.h2
-rw-r--r--lib/xray/xray_mips.cc9
-rw-r--r--lib/xray/xray_mips64.cc8
-rw-r--r--lib/xray/xray_powerpc64.cc6
-rw-r--r--lib/xray/xray_trampoline_x86_64.S46
-rw-r--r--lib/xray/xray_x86_64.cc36
12 files changed, 387 insertions, 140 deletions
diff --git a/lib/xray/xray_AArch64.cc b/lib/xray/xray_AArch64.cc
index 8d1c7c5d8..372c1ad87 100644
--- a/lib/xray/xray_AArch64.cc
+++ b/lib/xray/xray_AArch64.cc
@@ -18,8 +18,7 @@
#include <atomic>
#include <cassert>
-
-extern "C" void __clear_cache(void* start, void* end);
+extern "C" void __clear_cache(void *start, void *end);
namespace __xray {
@@ -86,8 +85,8 @@ inline static bool patchSled(const bool Enable, const uint32_t FuncId,
reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
uint32_t(PatchOpcodes::PO_B32), std::memory_order_release);
}
- __clear_cache(reinterpret_cast<char*>(FirstAddress),
- reinterpret_cast<char*>(CurAddress));
+ __clear_cache(reinterpret_cast<char *>(FirstAddress),
+ reinterpret_cast<char *>(CurAddress));
return true;
}
@@ -107,6 +106,12 @@ bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
return patchSled(Enable, FuncId, Sled, __xray_FunctionTailExit);
}
+bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySled &Sled)
+ XRAY_NEVER_INSTRUMENT { // FIXME: Implement in aarch64?
+ return false;
+}
+
// FIXME: Maybe implement this better?
bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; }
diff --git a/lib/xray/xray_arm.cc b/lib/xray/xray_arm.cc
index 26d673ec2..94e1c2638 100644
--- a/lib/xray/xray_arm.cc
+++ b/lib/xray/xray_arm.cc
@@ -18,7 +18,7 @@
#include <atomic>
#include <cassert>
-extern "C" void __clear_cache(void* start, void* end);
+extern "C" void __clear_cache(void *start, void *end);
namespace __xray {
@@ -122,8 +122,8 @@ inline static bool patchSled(const bool Enable, const uint32_t FuncId,
reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
uint32_t(PatchOpcodes::PO_B20), std::memory_order_release);
}
- __clear_cache(reinterpret_cast<char*>(FirstAddress),
- reinterpret_cast<char*>(CurAddress));
+ __clear_cache(reinterpret_cast<char *>(FirstAddress),
+ reinterpret_cast<char *>(CurAddress));
return true;
}
@@ -143,6 +143,12 @@ bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
return patchSled(Enable, FuncId, Sled, __xray_FunctionTailExit);
}
+bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySled &Sled)
+ XRAY_NEVER_INSTRUMENT { // FIXME: Implement in arm?
+ return false;
+}
+
// FIXME: Maybe implement this better?
bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; }
diff --git a/lib/xray/xray_fdr_log_records.h b/lib/xray/xray_fdr_log_records.h
index 36d9410d1..3d6d38892 100644
--- a/lib/xray/xray_fdr_log_records.h
+++ b/lib/xray/xray_fdr_log_records.h
@@ -29,6 +29,7 @@ struct alignas(16) MetadataRecord {
NewCPUId,
TSCWrap,
WalltimeMarker,
+ CustomEventMarker,
};
// Use 7 bits to identify this record type.
/* RecordKinds */ uint8_t RecordKind : 7;
diff --git a/lib/xray/xray_fdr_logging.cc b/lib/xray/xray_fdr_logging.cc
index e538b477a..a7e1382c3 100644
--- a/lib/xray/xray_fdr_logging.cc
+++ b/lib/xray/xray_fdr_logging.cc
@@ -41,45 +41,12 @@ namespace __xray {
// Global BufferQueue.
std::shared_ptr<BufferQueue> BQ;
-__sanitizer::atomic_sint32_t LoggingStatus = {
- XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
-
__sanitizer::atomic_sint32_t LogFlushStatus = {
XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
-std::unique_ptr<FDRLoggingOptions> FDROptions;
-
-XRayLogInitStatus fdrLoggingInit(std::size_t BufferSize, std::size_t BufferMax,
- void *Options,
- size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
- if (OptionsSize != sizeof(FDRLoggingOptions))
- return static_cast<XRayLogInitStatus>(__sanitizer::atomic_load(
- &LoggingStatus, __sanitizer::memory_order_acquire));
- s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
- if (!__sanitizer::atomic_compare_exchange_strong(
- &LoggingStatus, &CurrentStatus,
- XRayLogInitStatus::XRAY_LOG_INITIALIZING,
- __sanitizer::memory_order_release))
- return static_cast<XRayLogInitStatus>(CurrentStatus);
-
- FDROptions.reset(new FDRLoggingOptions());
- memcpy(FDROptions.get(), Options, OptionsSize);
- bool Success = false;
- BQ = std::make_shared<BufferQueue>(BufferSize, BufferMax, Success);
- if (!Success) {
- Report("BufferQueue init failed.\n");
- return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
- }
-
- // Install the actual handleArg0 handler after initialising the buffers.
- __xray_set_handler(fdrLoggingHandleArg0);
+FDRLoggingOptions FDROptions;
- __sanitizer::atomic_store(&LoggingStatus,
- XRayLogInitStatus::XRAY_LOG_INITIALIZED,
- __sanitizer::memory_order_release);
- Report("XRay FDR init successful.\n");
- return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
-}
+__sanitizer::SpinMutex FDROptionsMutex;
// Must finalize before flushing.
XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT {
@@ -108,7 +75,11 @@ XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT {
// (fixed-sized) and let the tools reading the buffers deal with the data
// afterwards.
//
- int Fd = FDROptions->Fd;
+ int Fd = -1;
+ {
+ __sanitizer::SpinMutexLock Guard(&FDROptionsMutex);
+ Fd = FDROptions.Fd;
+ }
if (Fd == -1)
Fd = getLogFD();
if (Fd == -1) {
@@ -120,8 +91,8 @@ XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT {
// Test for required CPU features and cache the cycle frequency
static bool TSCSupported = probeRequiredCPUFeatures();
- static uint64_t CycleFrequency = TSCSupported ? getTSCFrequency()
- : __xray::NanosecondsPerSecond;
+ static uint64_t CycleFrequency =
+ TSCSupported ? getTSCFrequency() : __xray::NanosecondsPerSecond;
XRayFileHeader Header;
Header.Version = 1;
@@ -192,8 +163,8 @@ XRayLogInitStatus fdrLoggingReset() XRAY_NEVER_INSTRUMENT {
return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
}
-void fdrLoggingHandleArg0(int32_t FuncId,
- XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
+static std::tuple<uint64_t, unsigned char>
+getTimestamp() XRAY_NEVER_INSTRUMENT {
// We want to get the TSC as early as possible, so that we can check whether
// we've seen this CPU before. We also do it before we load anything else, to
// allow for forward progress with the scheduling.
@@ -203,7 +174,7 @@ void fdrLoggingHandleArg0(int32_t FuncId,
// Test once for required CPU features
static bool TSCSupported = probeRequiredCPUFeatures();
- if(TSCSupported) {
+ if (TSCSupported) {
TSC = __xray::readTSC(CPU);
} else {
// FIXME: This code needs refactoring as it appears in multiple locations
@@ -216,9 +187,102 @@ void fdrLoggingHandleArg0(int32_t FuncId,
CPU = 0;
TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec;
}
+ return std::make_tuple(TSC, CPU);
+}
+
+void fdrLoggingHandleArg0(int32_t FuncId,
+ XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
+ auto TSC_CPU = getTimestamp();
+ __xray_fdr_internal::processFunctionHook(FuncId, Entry, std::get<0>(TSC_CPU),
+ std::get<1>(TSC_CPU), clock_gettime,
+ LoggingStatus, BQ);
+}
- __xray_fdr_internal::processFunctionHook(FuncId, Entry, TSC, CPU,
- clock_gettime, LoggingStatus, BQ);
+void fdrLoggingHandleCustomEvent(void *Event,
+ std::size_t EventSize) XRAY_NEVER_INSTRUMENT {
+ using namespace __xray_fdr_internal;
+ auto TSC_CPU = getTimestamp();
+ auto &TSC = std::get<0>(TSC_CPU);
+ auto &CPU = std::get<1>(TSC_CPU);
+ thread_local bool Running = false;
+ RecursionGuard Guard{Running};
+ if (!Guard) {
+ assert(Running && "RecursionGuard is buggy!");
+ return;
+ }
+ if (EventSize > std::numeric_limits<int32_t>::max()) {
+ using Empty = struct {};
+ static Empty Once = [&] {
+ Report("Event size too large = %zu ; > max = %d\n", EventSize,
+ std::numeric_limits<int32_t>::max());
+ return Empty();
+ }();
+ (void)Once;
+ }
+ int32_t ReducedEventSize = static_cast<int32_t>(EventSize);
+ if (!isLogInitializedAndReady(LocalBQ, TSC, CPU, clock_gettime))
+ return;
+
+ // Here we need to prepare the log to handle:
+ // - The metadata record we're going to write. (16 bytes)
+ // - The additional data we're going to write. Currently, that's the size of
+ // the event we're going to dump into the log as free-form bytes.
+ if (!prepareBuffer(clock_gettime, MetadataRecSize + EventSize)) {
+ LocalBQ = nullptr;
+ return;
+ }
+
+ // Write the custom event metadata record, which consists of the following
+ // information:
+ // - 8 bytes (64-bits) for the full TSC when the event started.
+ // - 4 bytes (32-bits) for the length of the data.
+ MetadataRecord CustomEvent;
+ CustomEvent.Type = uint8_t(RecordType::Metadata);
+ CustomEvent.RecordKind =
+ uint8_t(MetadataRecord::RecordKinds::CustomEventMarker);
+ constexpr auto TSCSize = sizeof(std::get<0>(TSC_CPU));
+ std::memcpy(&CustomEvent.Data, &ReducedEventSize, sizeof(int32_t));
+ std::memcpy(&CustomEvent.Data[sizeof(int32_t)], &TSC, TSCSize);
+ std::memcpy(RecordPtr, &CustomEvent, sizeof(CustomEvent));
+ RecordPtr += sizeof(CustomEvent);
+ std::memcpy(RecordPtr, Event, ReducedEventSize);
+ endBufferIfFull();
+}
+
+XRayLogInitStatus fdrLoggingInit(std::size_t BufferSize, std::size_t BufferMax,
+ void *Options,
+ size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
+ if (OptionsSize != sizeof(FDRLoggingOptions))
+ return static_cast<XRayLogInitStatus>(__sanitizer::atomic_load(
+ &LoggingStatus, __sanitizer::memory_order_acquire));
+ s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+ if (!__sanitizer::atomic_compare_exchange_strong(
+ &LoggingStatus, &CurrentStatus,
+ XRayLogInitStatus::XRAY_LOG_INITIALIZING,
+ __sanitizer::memory_order_release))
+ return static_cast<XRayLogInitStatus>(CurrentStatus);
+
+ {
+ __sanitizer::SpinMutexLock Guard(&FDROptionsMutex);
+ memcpy(&FDROptions, Options, OptionsSize);
+ }
+
+ bool Success = false;
+ BQ = std::make_shared<BufferQueue>(BufferSize, BufferMax, Success);
+ if (!Success) {
+ Report("BufferQueue init failed.\n");
+ return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+ }
+
+ // Install the actual handleArg0 handler after initialising the buffers.
+ __xray_set_handler(fdrLoggingHandleArg0);
+ __xray_set_customevent_handler(fdrLoggingHandleCustomEvent);
+
+ __sanitizer::atomic_store(&LoggingStatus,
+ XRayLogInitStatus::XRAY_LOG_INITIALIZED,
+ __sanitizer::memory_order_release);
+ Report("XRay FDR init successful.\n");
+ return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
}
} // namespace __xray
diff --git a/lib/xray/xray_fdr_logging_impl.h b/lib/xray/xray_fdr_logging_impl.h
index ce360cb03..c801babc3 100644
--- a/lib/xray/xray_fdr_logging_impl.h
+++ b/lib/xray/xray_fdr_logging_impl.h
@@ -37,6 +37,9 @@
namespace __xray {
+__sanitizer::atomic_sint32_t LoggingStatus = {
+ XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
+
/// We expose some of the state transitions when FDR logging mode is operating
/// such that we can simulate a series of log events that may occur without
/// and test with determinism without worrying about the real CPU time.
@@ -123,12 +126,21 @@ thread_local uint8_t NumTailCalls = 0;
constexpr auto MetadataRecSize = sizeof(MetadataRecord);
constexpr auto FunctionRecSize = sizeof(FunctionRecord);
+// We use a thread_local variable to keep track of which CPUs we've already
+// run, and the TSC times for these CPUs. This allows us to stop repeating the
+// CPU field in the function records.
+//
+// We assume that we'll support only 65536 CPUs for x86_64.
+thread_local uint16_t CurrentCPU = std::numeric_limits<uint16_t>::max();
+thread_local uint64_t LastTSC = 0;
+thread_local uint64_t LastFunctionEntryTSC = 0;
+
class ThreadExitBufferCleanup {
- std::weak_ptr<BufferQueue> Buffers;
+ std::shared_ptr<BufferQueue> &Buffers;
BufferQueue::Buffer &Buffer;
public:
- explicit ThreadExitBufferCleanup(std::weak_ptr<BufferQueue> BQ,
+ explicit ThreadExitBufferCleanup(std::shared_ptr<BufferQueue> &BQ,
BufferQueue::Buffer &Buffer)
XRAY_NEVER_INSTRUMENT : Buffers(BQ),
Buffer(Buffer) {}
@@ -142,17 +154,24 @@ public:
// the queue.
assert((RecordPtr + MetadataRecSize) - static_cast<char *>(Buffer.Buffer) >=
static_cast<ptrdiff_t>(MetadataRecSize));
- if (auto BQ = Buffers.lock()) {
+ if (Buffers) {
writeEOBMetadata();
- auto EC = BQ->releaseBuffer(Buffer);
+ auto EC = Buffers->releaseBuffer(Buffer);
if (EC != BufferQueue::ErrorCode::Ok)
Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer,
BufferQueue::getErrorString(EC));
+ Buffers = nullptr;
return;
}
}
};
+// Make sure a thread that's ever called handleArg0 has a thread-local
+// live reference to the buffer queue for this particular instance of
+// FDRLogging, and that we're going to clean it up when the thread exits.
+thread_local std::shared_ptr<BufferQueue> LocalBQ = nullptr;
+thread_local ThreadExitBufferCleanup Cleanup(LocalBQ, Buffer);
+
class RecursionGuard {
bool &Running;
const bool Valid;
@@ -176,7 +195,7 @@ public:
}
};
-static inline bool loggingInitialized(
+inline bool loggingInitialized(
const __sanitizer::atomic_sint32_t &LoggingStatus) XRAY_NEVER_INSTRUMENT {
return __sanitizer::atomic_load(&LoggingStatus,
__sanitizer::memory_order_acquire) ==
@@ -185,7 +204,7 @@ static inline bool loggingInitialized(
} // namespace
-static inline void writeNewBufferPreamble(pid_t Tid, timespec TS,
+inline void writeNewBufferPreamble(pid_t Tid, timespec TS,
char *&MemPtr) XRAY_NEVER_INSTRUMENT {
static constexpr int InitRecordsCount = 2;
std::aligned_storage<sizeof(MetadataRecord)>::type Records[InitRecordsCount];
@@ -222,9 +241,8 @@ static inline void writeNewBufferPreamble(pid_t Tid, timespec TS,
NumTailCalls = 0;
}
-static inline void setupNewBuffer(int (*wall_clock_reader)(clockid_t,
- struct timespec *))
- XRAY_NEVER_INSTRUMENT {
+inline void setupNewBuffer(int (*wall_clock_reader)(
+ clockid_t, struct timespec *)) XRAY_NEVER_INSTRUMENT {
RecordPtr = static_cast<char *>(Buffer.Buffer);
pid_t Tid = syscall(SYS_gettid);
timespec TS{0, 0};
@@ -235,7 +253,7 @@ static inline void setupNewBuffer(int (*wall_clock_reader)(clockid_t,
NumTailCalls = 0;
}
-static inline void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC,
+inline void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC,
char *&MemPtr) XRAY_NEVER_INSTRUMENT {
MetadataRecord NewCPUId;
NewCPUId.Type = uint8_t(RecordType::Metadata);
@@ -253,12 +271,12 @@ static inline void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC,
NumTailCalls = 0;
}
-static inline void writeNewCPUIdMetadata(uint16_t CPU,
+inline void writeNewCPUIdMetadata(uint16_t CPU,
uint64_t TSC) XRAY_NEVER_INSTRUMENT {
writeNewCPUIdMetadata(CPU, TSC, RecordPtr);
}
-static inline void writeEOBMetadata(char *&MemPtr) XRAY_NEVER_INSTRUMENT {
+inline void writeEOBMetadata(char *&MemPtr) XRAY_NEVER_INSTRUMENT {
MetadataRecord EOBMeta;
EOBMeta.Type = uint8_t(RecordType::Metadata);
EOBMeta.RecordKind = uint8_t(MetadataRecord::RecordKinds::EndOfBuffer);
@@ -269,11 +287,11 @@ static inline void writeEOBMetadata(char *&MemPtr) XRAY_NEVER_INSTRUMENT {
NumTailCalls = 0;
}
-static inline void writeEOBMetadata() XRAY_NEVER_INSTRUMENT {
+inline void writeEOBMetadata() XRAY_NEVER_INSTRUMENT {
writeEOBMetadata(RecordPtr);
}
-static inline void writeTSCWrapMetadata(uint64_t TSC,
+inline void writeTSCWrapMetadata(uint64_t TSC,
char *&MemPtr) XRAY_NEVER_INSTRUMENT {
MetadataRecord TSCWrap;
TSCWrap.Type = uint8_t(RecordType::Metadata);
@@ -289,11 +307,11 @@ static inline void writeTSCWrapMetadata(uint64_t TSC,
NumTailCalls = 0;
}
-static inline void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT {
+inline void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT {
writeTSCWrapMetadata(TSC, RecordPtr);
}
-static inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
+inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
XRayEntryType EntryType,
char *&MemPtr) XRAY_NEVER_INSTRUMENT {
std::aligned_storage<sizeof(FunctionRecord), alignof(FunctionRecord)>::type
@@ -339,6 +357,17 @@ static inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
FuncRecord.RecordKind =
uint8_t(FunctionRecord::RecordKinds::FunctionTailExit);
break;
+ case XRayEntryType::CUSTOM_EVENT: {
+ // This is a bug in patching, so we'll report it once and move on.
+ static bool Once = [&] {
+ Report("Internal error: patched an XRay custom event call as a function; "
+ "func id = %d\n",
+ FuncId);
+ return true;
+ }();
+ (void)Once;
+ return;
+ }
}
std::memcpy(MemPtr, &AlignedFuncRecordBuffer, sizeof(FunctionRecord));
@@ -346,8 +375,9 @@ static inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
}
static uint64_t thresholdTicks() {
- static uint64_t TicksPerSec = probeRequiredCPUFeatures() ? getTSCFrequency() :
- __xray::NanosecondsPerSecond;
+ static uint64_t TicksPerSec = probeRequiredCPUFeatures()
+ ? getTSCFrequency()
+ : __xray::NanosecondsPerSecond;
static const uint64_t ThresholdTicks =
TicksPerSec * flags()->xray_fdr_log_func_duration_threshold_us / 1000000;
return ThresholdTicks;
@@ -397,9 +427,8 @@ static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC,
RewindingRecordPtr -= FunctionRecSize;
RewindingTSC -= ExpectedTailExit.TSCDelta;
AlignedFuncStorage FunctionEntryBuffer;
- const auto &ExpectedFunctionEntry =
- *reinterpret_cast<FunctionRecord *>(std::memcpy(
- &FunctionEntryBuffer, RewindingRecordPtr, FunctionRecSize));
+ const auto &ExpectedFunctionEntry = *reinterpret_cast<FunctionRecord *>(
+ std::memcpy(&FunctionEntryBuffer, RewindingRecordPtr, FunctionRecSize));
assert(ExpectedFunctionEntry.RecordKind ==
uint8_t(FunctionRecord::RecordKinds::FunctionEnter) &&
"Expected to find function entry when rewinding tail call.");
@@ -422,7 +451,7 @@ static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC,
}
}
-static inline bool releaseThreadLocalBuffer(BufferQueue *BQ) {
+inline bool releaseThreadLocalBuffer(BufferQueue *BQ) {
auto EC = BQ->releaseBuffer(Buffer);
if (EC != BufferQueue::ErrorCode::Ok) {
Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer,
@@ -432,11 +461,29 @@ static inline bool releaseThreadLocalBuffer(BufferQueue *BQ) {
return true;
}
-static inline void processFunctionHook(
- int32_t FuncId, XRayEntryType Entry, uint64_t TSC, unsigned char CPU,
- int (*wall_clock_reader)(clockid_t, struct timespec *),
- __sanitizer::atomic_sint32_t &LoggingStatus,
- const std::shared_ptr<BufferQueue> &BQ) XRAY_NEVER_INSTRUMENT {
+inline bool prepareBuffer(int (*wall_clock_reader)(clockid_t,
+ struct timespec *),
+ size_t MaxSize) XRAY_NEVER_INSTRUMENT {
+ char *BufferStart = static_cast<char *>(Buffer.Buffer);
+ if ((RecordPtr + MaxSize) > (BufferStart + Buffer.Size - MetadataRecSize)) {
+ writeEOBMetadata();
+ if (!releaseThreadLocalBuffer(LocalBQ.get()))
+ return false;
+ auto EC = LocalBQ->getBuffer(Buffer);
+ if (EC != BufferQueue::ErrorCode::Ok) {
+ Report("Failed to acquire a buffer; error=%s\n",
+ BufferQueue::getErrorString(EC));
+ return false;
+ }
+ setupNewBuffer(wall_clock_reader);
+ }
+ return true;
+}
+
+inline bool isLogInitializedAndReady(
+ std::shared_ptr<BufferQueue> &LocalBQ, uint64_t TSC, unsigned char CPU,
+ int (*wall_clock_reader)(clockid_t,
+ struct timespec *)) XRAY_NEVER_INSTRUMENT {
// Bail out right away if logging is not initialized yet.
// We should take the opportunity to release the buffer though.
auto Status = __sanitizer::atomic_load(&LoggingStatus,
@@ -446,44 +493,19 @@ static inline void processFunctionHook(
(Status == XRayLogInitStatus::XRAY_LOG_FINALIZING ||
Status == XRayLogInitStatus::XRAY_LOG_FINALIZED)) {
writeEOBMetadata();
- if (!releaseThreadLocalBuffer(BQ.get()))
- return;
+ if (!releaseThreadLocalBuffer(LocalBQ.get()))
+ return false;
RecordPtr = nullptr;
+ LocalBQ = nullptr;
+ return false;
}
- return;
- }
-
- // We use a thread_local variable to keep track of which CPUs we've already
- // run, and the TSC times for these CPUs. This allows us to stop repeating the
- // CPU field in the function records.
- //
- // We assume that we'll support only 65536 CPUs for x86_64.
- thread_local uint16_t CurrentCPU = std::numeric_limits<uint16_t>::max();
- thread_local uint64_t LastTSC = 0;
- thread_local uint64_t LastFunctionEntryTSC = 0;
-
- // Make sure a thread that's ever called handleArg0 has a thread-local
- // live reference to the buffer queue for this particular instance of
- // FDRLogging, and that we're going to clean it up when the thread exits.
- thread_local auto LocalBQ = BQ;
- thread_local ThreadExitBufferCleanup Cleanup(LocalBQ, Buffer);
-
- // Prevent signal handler recursion, so in case we're already in a log writing
- // mode and the signal handler comes in (and is also instrumented) then we
- // don't want to be clobbering potentially partial writes already happening in
- // the thread. We use a simple thread_local latch to only allow one on-going
- // handleArg0 to happen at any given time.
- thread_local bool Running = false;
- RecursionGuard Guard{Running};
- if (!Guard) {
- assert(Running == true && "RecursionGuard is buggy!");
- return;
+ return false;
}
if (!loggingInitialized(LoggingStatus) || LocalBQ->finalizing()) {
writeEOBMetadata();
- if (!releaseThreadLocalBuffer(BQ.get()))
- return;
+ if (!releaseThreadLocalBuffer(LocalBQ.get()))
+ return false;
RecordPtr = nullptr;
}
@@ -496,19 +518,57 @@ static inline void processFunctionHook(
LS != XRayLogInitStatus::XRAY_LOG_FINALIZED)
Report("Failed to acquire a buffer; error=%s\n",
BufferQueue::getErrorString(EC));
- return;
+ return false;
}
setupNewBuffer(wall_clock_reader);
}
if (CurrentCPU == std::numeric_limits<uint16_t>::max()) {
- // This means this is the first CPU this thread has ever run on. We set the
- // current CPU and record this as the first TSC we've seen.
+ // This means this is the first CPU this thread has ever run on. We set
+ // the current CPU and record this as the first TSC we've seen.
CurrentCPU = CPU;
writeNewCPUIdMetadata(CPU, TSC);
}
+ return true;
+} // namespace __xray_fdr_internal
+
+inline void endBufferIfFull() XRAY_NEVER_INSTRUMENT {
+ auto BufferStart = static_cast<char *>(Buffer.Buffer);
+ if ((RecordPtr + MetadataRecSize) - BufferStart == MetadataRecSize) {
+ writeEOBMetadata();
+ if (!releaseThreadLocalBuffer(LocalBQ.get()))
+ return;
+ RecordPtr = nullptr;
+ }
+}
+
+inline void processFunctionHook(
+ int32_t FuncId, XRayEntryType Entry, uint64_t TSC, unsigned char CPU,
+ int (*wall_clock_reader)(clockid_t, struct timespec *),
+ __sanitizer::atomic_sint32_t &LoggingStatus,
+ const std::shared_ptr<BufferQueue> &BQ) XRAY_NEVER_INSTRUMENT {
+ // Prevent signal handler recursion, so in case we're already in a log writing
+ // mode and the signal handler comes in (and is also instrumented) then we
+ // don't want to be clobbering potentially partial writes already happening in
+ // the thread. We use a simple thread_local latch to only allow one on-going
+ // handleArg0 to happen at any given time.
+ thread_local bool Running = false;
+ RecursionGuard Guard{Running};
+ if (!Guard) {
+ assert(Running == true && "RecursionGuard is buggy!");
+ return;
+ }
+
+ // In case the reference has been cleaned up before, we make sure we
+ // initialize it to the provided BufferQueue.
+ if (LocalBQ == nullptr)
+ LocalBQ = BQ;
+
+ if (!isLogInitializedAndReady(LocalBQ, TSC, CPU, wall_clock_reader))
+ return;
+
// Before we go setting up writing new function entries, we need to be really
// careful about the pointer math we're doing. This means we need to ensure
// that the record we are about to write is going to fit into the buffer,
@@ -545,24 +605,14 @@ static inline void processFunctionHook(
// bytes in the end of the buffer, we need to write out the EOB, get a new
// Buffer, set it up properly before doing any further writing.
//
- char *BufferStart = static_cast<char *>(Buffer.Buffer);
- if ((RecordPtr + (MetadataRecSize + FunctionRecSize)) - BufferStart <
- static_cast<ptrdiff_t>(MetadataRecSize)) {
- writeEOBMetadata();
- if (!releaseThreadLocalBuffer(LocalBQ.get()))
- return;
- auto EC = LocalBQ->getBuffer(Buffer);
- if (EC != BufferQueue::ErrorCode::Ok) {
- Report("Failed to acquire a buffer; error=%s\n",
- BufferQueue::getErrorString(EC));
- return;
- }
- setupNewBuffer(wall_clock_reader);
+ if (!prepareBuffer(wall_clock_reader, FunctionRecSize + MetadataRecSize)) {
+ LocalBQ = nullptr;
+ return;
}
// By this point, we are now ready to write at most 24 bytes (one metadata
// record and one function record).
- BufferStart = static_cast<char *>(Buffer.Buffer);
+ auto BufferStart = static_cast<char *>(Buffer.Buffer);
assert((RecordPtr + (MetadataRecSize + FunctionRecSize)) - BufferStart >=
static_cast<ptrdiff_t>(MetadataRecSize) &&
"Misconfigured BufferQueue provided; Buffer size not large enough.");
@@ -586,7 +636,6 @@ static inline void processFunctionHook(
// FunctionRecord. In this case we write down just a FunctionRecord with
// the correct TSC delta.
//
-
uint32_t RecordTSCDelta = 0;
if (CPU != CurrentCPU) {
// We've moved to a new CPU.
@@ -619,21 +668,27 @@ static inline void processFunctionHook(
break;
rewindRecentCall(TSC, LastTSC, LastFunctionEntryTSC, FuncId);
return; // without writing log.
+ case XRayEntryType::CUSTOM_EVENT: {
+ // This is a bug in patching, so we'll report it once and move on.
+ static bool Once = [&] {
+ Report("Internal error: patched an XRay custom event call as a function; "
+ "func id = %d",
+ FuncId);
+ return true;
+ }();
+ (void)Once;
+ return;
+ }
}
writeFunctionRecord(FuncId, RecordTSCDelta, Entry, RecordPtr);
// If we've exhausted the buffer by this time, we then release the buffer to
// make sure that other threads may start using this buffer.
- if ((RecordPtr + MetadataRecSize) - BufferStart == MetadataRecSize) {
- writeEOBMetadata();
- if (!releaseThreadLocalBuffer(LocalBQ.get()))
- return;
- RecordPtr = nullptr;
- }
+ endBufferIfFull();
}
} // namespace __xray_fdr_internal
-
} // namespace __xray
+
#endif // XRAY_XRAY_FDR_LOGGING_IMPL_H
diff --git a/lib/xray/xray_interface.cc b/lib/xray/xray_interface.cc
index 26f0ab122..c437a72e3 100644
--- a/lib/xray/xray_interface.cc
+++ b/lib/xray/xray_interface.cc
@@ -50,6 +50,9 @@ __sanitizer::atomic_uintptr_t XRayPatchedFunction{0};
// This is the function to call from the arg1-enabled sleds/trampolines.
__sanitizer::atomic_uintptr_t XRayArgLogger{0};
+// This is the function to call when we encounter a custom event log call.
+__sanitizer::atomic_uintptr_t XRayPatchedCustomEvent{0};
+
// MProtectHelper is an RAII wrapper for calls to mprotect(...) that will undo
// any successful mprotect(...) changes. This is used to make a page writeable
// and executable, and upon destruction if it was successful in doing so returns
@@ -97,7 +100,19 @@ int __xray_set_handler(void (*entry)(int32_t,
__sanitizer::memory_order_acquire)) {
__sanitizer::atomic_store(&__xray::XRayPatchedFunction,
- reinterpret_cast<uint64_t>(entry),
+ reinterpret_cast<uintptr_t>(entry),
+ __sanitizer::memory_order_release);
+ return 1;
+ }
+ return 0;
+}
+
+int __xray_set_customevent_handler(void (*entry)(void *, size_t))
+ XRAY_NEVER_INSTRUMENT {
+ if (__sanitizer::atomic_load(&XRayInitialized,
+ __sanitizer::memory_order_acquire)) {
+ __sanitizer::atomic_store(&__xray::XRayPatchedCustomEvent,
+ reinterpret_cast<uintptr_t>(entry),
__sanitizer::memory_order_release);
return 1;
}
@@ -161,6 +176,9 @@ inline bool patchSled(const XRaySledEntry &Sled, bool Enable,
case XRayEntryType::LOG_ARGS_ENTRY:
Success = patchFunctionEntry(Enable, FuncId, Sled, __xray_ArgLoggerEntry);
break;
+ case XRayEntryType::CUSTOM_EVENT:
+ Success = patchCustomEvent(Enable, FuncId, Sled);
+ break;
default:
Report("Unsupported sled kind '%d' @%04x\n", Sled.Address, int(Sled.Kind));
return false;
@@ -301,6 +319,7 @@ int __xray_set_handler_arg1(void (*Handler)(int32_t, XRayEntryType, uint64_t)) {
__sanitizer::memory_order_release);
return 1;
}
+
int __xray_remove_handler_arg1() { return __xray_set_handler_arg1(nullptr); }
uintptr_t __xray_function_address(int32_t FuncId) XRAY_NEVER_INSTRUMENT {
diff --git a/lib/xray/xray_interface_internal.h b/lib/xray/xray_interface_internal.h
index ef0c6b158..4a2784612 100644
--- a/lib/xray/xray_interface_internal.h
+++ b/lib/xray/xray_interface_internal.h
@@ -60,6 +60,7 @@ bool patchFunctionEntry(bool Enable, uint32_t FuncId,
bool patchFunctionExit(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled);
bool patchFunctionTailExit(bool Enable, uint32_t FuncId,
const XRaySledEntry &Sled);
+bool patchCustomEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled);
} // namespace __xray
@@ -70,6 +71,7 @@ extern void __xray_FunctionEntry();
extern void __xray_FunctionExit();
extern void __xray_FunctionTailExit();
extern void __xray_ArgLoggerEntry();
+extern void __xray_CustomEvent();
}
#endif
diff --git a/lib/xray/xray_mips.cc b/lib/xray/xray_mips.cc
index c8ff39936..cd863304d 100644
--- a/lib/xray/xray_mips.cc
+++ b/lib/xray/xray_mips.cc
@@ -95,7 +95,8 @@ inline static bool patchSled(const bool Enable, const uint32_t FuncId,
// B #44
if (Enable) {
- uint32_t LoTracingHookAddr = reinterpret_cast<int32_t>(TracingHook) & 0xffff;
+ uint32_t LoTracingHookAddr =
+ reinterpret_cast<int32_t>(TracingHook) & 0xffff;
uint32_t HiTracingHookAddr =
(reinterpret_cast<int32_t>(TracingHook) >> 16) & 0xffff;
uint32_t LoFunctionID = FuncId & 0xffff;
@@ -151,6 +152,12 @@ bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
}
+bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // FIXME: Implement in mips?
+ return false;
+}
+
} // namespace __xray
extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {
diff --git a/lib/xray/xray_mips64.cc b/lib/xray/xray_mips64.cc
index 21136848c..fa8fdd5ab 100644
--- a/lib/xray/xray_mips64.cc
+++ b/lib/xray/xray_mips64.cc
@@ -93,7 +93,8 @@ inline static bool patchSled(const bool Enable, const uint32_t FuncId,
if (Enable) {
uint32_t LoTracingHookAddr =
reinterpret_cast<int64_t>(TracingHook) & 0xffff;
- uint32_t HiTracingHookAddr = (reinterpret_cast<int64_t>(TracingHook) >> 16) & 0xffff;
+ uint32_t HiTracingHookAddr =
+ (reinterpret_cast<int64_t>(TracingHook) >> 16) & 0xffff;
uint32_t HigherTracingHookAddr =
(reinterpret_cast<int64_t>(TracingHook) >> 32) & 0xffff;
uint32_t HighestTracingHookAddr =
@@ -160,6 +161,11 @@ bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
}
+bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // FIXME: Implement in mips64?
+ return false;
+}
} // namespace __xray
extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {
diff --git a/lib/xray/xray_powerpc64.cc b/lib/xray/xray_powerpc64.cc
index 6a7554cfc..ab03cb100 100644
--- a/lib/xray/xray_powerpc64.cc
+++ b/lib/xray/xray_powerpc64.cc
@@ -93,6 +93,12 @@ bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
// FIXME: Maybe implement this better?
bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; }
+bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // FIXME: Implement in powerpc64?
+ return false;
+}
+
} // namespace __xray
extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {
diff --git a/lib/xray/xray_trampoline_x86_64.S b/lib/xray/xray_trampoline_x86_64.S
index 847ecef8d..b59eedc4b 100644
--- a/lib/xray/xray_trampoline_x86_64.S
+++ b/lib/xray/xray_trampoline_x86_64.S
@@ -176,9 +176,15 @@ __xray_ArgLoggerEntry:
je .Larg1entryFail
.Larg1entryLog:
- movq %rdi, %rdx // first argument will become the third
- xorq %rsi, %rsi // XRayEntryType::ENTRY into the second
- movl %r10d, %edi // 32-bit function ID becomes the first
+
+ // First argument will become the third
+ movq %rdi, %rdx
+
+ // XRayEntryType::ENTRY into the second
+ xorq %rsi, %rsi
+
+ // 32-bit function ID becomes the first
+ movl %r10d, %edi
callq *%rax
.Larg1entryFail:
@@ -189,4 +195,38 @@ __xray_ArgLoggerEntry:
.size __xray_ArgLoggerEntry, .Larg1entryEnd-__xray_ArgLoggerEntry
.cfi_endproc
+//===----------------------------------------------------------------------===//
+
+ .global __xray_CustomEvent
+ .align 16, 0x90
+ .type __xray_CustomEvent,@function
+__xray_CustomEvent:
+ .cfi_startproc
+ subq $16, %rsp
+ .cfi_def_cfa_offset 24
+ movq %rbp, 8(%rsp)
+ movq %rax, 0(%rsp)
+
+ // We take two arguments to this trampoline, which should be in rdi and rsi
+ // already. We also make sure that we stash %rax because we use that register
+ // to call the logging handler.
+ movq _ZN6__xray22XRayPatchedCustomEventE(%rip), %rax
+ testq %rax,%rax
+ je .LcustomEventCleanup
+
+ // At this point we know that rcx and rdx already has the data, so we just
+ // call the logging handler.
+ callq *%rax
+
+.LcustomEventCleanup:
+ movq 0(%rsp), %rax
+ movq 8(%rsp), %rbp
+ addq $16, %rsp
+ .cfi_def_cfa_offset 8
+ retq
+
+.Ltmp8:
+ .size __xray_CustomEvent, .Ltmp8-__xray_CustomEvent
+ .cfi_endproc
+
NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/xray/xray_x86_64.cc b/lib/xray/xray_x86_64.cc
index 2e9a8d270..e34806fa1 100644
--- a/lib/xray/xray_x86_64.cc
+++ b/lib/xray/xray_x86_64.cc
@@ -75,8 +75,10 @@ uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
static constexpr uint8_t CallOpCode = 0xe8;
static constexpr uint16_t MovR10Seq = 0xba41;
static constexpr uint16_t Jmp9Seq = 0x09eb;
+static constexpr uint16_t Jmp20Seq = 0x14eb;
static constexpr uint8_t JmpOpCode = 0xe9;
static constexpr uint8_t RetOpCode = 0xc3;
+static constexpr uint16_t NopwSeq = 0x9066;
static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()};
static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()};
@@ -201,6 +203,40 @@ bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
return true;
}
+bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // Here we do the dance of replacing the following sled:
+ //
+ // xray_sled_n:
+ // jmp +19 // 2 bytes
+ // ...
+ //
+ // With the following:
+ //
+ // nopw // 2 bytes*
+ // ...
+ //
+ // We need to do this in the following order:
+ //
+ // 1. Overwrite the 5-byte nop with the call (relative), where (relative) is
+ // the relative offset to the __xray_CustomEvent trampoline.
+ // 2. Do a two-byte atomic write over the 'jmp +24' to turn it into a 'nopw'.
+ // This allows us to "enable" this code once the changes have committed.
+ //
+ // The "unpatch" should just turn the 'nopw' back to a 'jmp +24'.
+ //
+ if (Enable) {
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), NopwSeq,
+ std::memory_order_release);
+ } else {
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp20Seq,
+ std::memory_order_release);
+ }
+ return false;
+}
+
// We determine whether the CPU we're running on has the correct features we
// need. In x86_64 this will be rdtscp support.
bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT {