//===-- xray_inmemory_log.cc ------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file is a part of XRay, a dynamic runtime instrumentation system. // // Implementation of a simple in-memory log of XRay events. This defines a // logging function that's compatible with the XRay handler interface, and // routines for exporting data to files. // //===----------------------------------------------------------------------===// #include #include #include #include #include #include #include #include #include #include #include "sanitizer_common/sanitizer_allocator_internal.h" #include "sanitizer_common/sanitizer_libc.h" #include "xray/xray_records.h" #include "xray_defs.h" #include "xray_flags.h" #include "xray_inmemory_log.h" #include "xray_interface_internal.h" #include "xray_tsc.h" #include "xray_utils.h" namespace __xray { __sanitizer::SpinMutex LogMutex; // We use elements of this type to record the entry TSC of every function ID we // see as we're tracing a particular thread's execution. struct alignas(16) StackEntry { int32_t FuncId; uint16_t Type; uint8_t CPU; uint8_t Padding; uint64_t TSC; }; static_assert(sizeof(StackEntry) == 16, "Wrong size for StackEntry"); struct alignas(64) ThreadLocalData { void *InMemoryBuffer = nullptr; size_t BufferSize = 0; size_t BufferOffset = 0; void *ShadowStack = nullptr; size_t StackSize = 0; size_t StackEntries = 0; int Fd = -1; pid_t TID = 0; }; static pthread_key_t PThreadKey; static __sanitizer::atomic_uint8_t BasicInitialized{0}; BasicLoggingOptions GlobalOptions; thread_local volatile bool RecursionGuard = false; static uint64_t thresholdTicks() XRAY_NEVER_INSTRUMENT { static uint64_t TicksPerSec = probeRequiredCPUFeatures() ? getTSCFrequency() : __xray::NanosecondsPerSecond; static const uint64_t ThresholdTicks = TicksPerSec * GlobalOptions.DurationFilterMicros / 1000000; return ThresholdTicks; } static int openLogFile() XRAY_NEVER_INSTRUMENT { int F = getLogFD(); if (F == -1) return -1; // Test for required CPU features and cache the cycle frequency static bool TSCSupported = probeRequiredCPUFeatures(); static uint64_t CycleFrequency = TSCSupported ? getTSCFrequency() : __xray::NanosecondsPerSecond; // Since we're here, we get to write the header. We set it up so that the // header will only be written once, at the start, and let the threads // logging do writes which just append. XRayFileHeader Header; Header.Version = 2; // Version 2 includes tail exit records. Header.Type = FileTypes::NAIVE_LOG; Header.CycleFrequency = CycleFrequency; // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc' // before setting the values in the header. Header.ConstantTSC = 1; Header.NonstopTSC = 1; retryingWriteAll(F, reinterpret_cast(&Header), reinterpret_cast(&Header) + sizeof(Header)); return F; } int getGlobalFd() XRAY_NEVER_INSTRUMENT { static int Fd = openLogFile(); return Fd; } ThreadLocalData &getThreadLocalData() XRAY_NEVER_INSTRUMENT { thread_local ThreadLocalData TLD; thread_local bool UNUSED TOnce = [] { if (GlobalOptions.ThreadBufferSize == 0) { if (__sanitizer::Verbosity()) Report("Not initializing TLD since ThreadBufferSize == 0.\n"); return false; } TLD.TID = __sanitizer::GetTid(); pthread_setspecific(PThreadKey, &TLD); TLD.Fd = getGlobalFd(); TLD.InMemoryBuffer = reinterpret_cast( InternalAlloc(sizeof(XRayRecord) * GlobalOptions.ThreadBufferSize, nullptr, alignof(XRayRecord))); TLD.BufferSize = GlobalOptions.ThreadBufferSize; TLD.BufferOffset = 0; if (GlobalOptions.MaxStackDepth == 0) { if (__sanitizer::Verbosity()) Report("Not initializing the ShadowStack since MaxStackDepth == 0.\n"); TLD.StackSize = 0; TLD.StackEntries = 0; TLD.ShadowStack = nullptr; return false; } TLD.ShadowStack = reinterpret_cast( InternalAlloc(sizeof(StackEntry) * GlobalOptions.MaxStackDepth, nullptr, alignof(StackEntry))); TLD.StackSize = GlobalOptions.MaxStackDepth; TLD.StackEntries = 0; if (__sanitizer::Verbosity() >= 2) { static auto UNUSED Once = [] { auto ticks = thresholdTicks(); Report("Ticks threshold: %d\n", ticks); return false; }(); } return false; }(); return TLD; } template void InMemoryRawLog(int32_t FuncId, XRayEntryType Type, RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT { auto &TLD = getThreadLocalData(); auto &InMemoryBuffer = TLD.InMemoryBuffer; int Fd = getGlobalFd(); if (Fd == -1) return; // Use a simple recursion guard, to handle cases where we're already logging // and for one reason or another, this function gets called again in the same // thread. if (RecursionGuard) return; RecursionGuard = true; auto ExitGuard = __sanitizer::at_scope_exit([] { RecursionGuard = false; }); uint8_t CPU = 0; uint64_t TSC = ReadTSC(CPU); switch (Type) { case XRayEntryType::ENTRY: case XRayEntryType::LOG_ARGS_ENTRY: { // Short circuit if we've reached the maximum depth of the stack. if (TLD.StackEntries++ >= TLD.StackSize) return; // When we encounter an entry event, we keep track of the TSC and the CPU, // and put it in the stack. StackEntry E; E.FuncId = FuncId; E.CPU = CPU; E.Type = Type; E.TSC = TSC; auto StackEntryPtr = static_cast(TLD.ShadowStack) + (sizeof(StackEntry) * (TLD.StackEntries - 1)); __sanitizer::internal_memcpy(StackEntryPtr, &E, sizeof(StackEntry)); break; } case XRayEntryType::EXIT: case XRayEntryType::TAIL: { if (TLD.StackEntries == 0) break; if (--TLD.StackEntries >= TLD.StackSize) return; // When we encounter an exit event, we check whether all the following are // true: // // - The Function ID is the same as the most recent entry in the stack. // - The CPU is the same as the most recent entry in the stack. // - The Delta of the TSCs is less than the threshold amount of time we're // looking to record. // // If all of these conditions are true, we pop the stack and don't write a // record and move the record offset back. StackEntry StackTop; auto StackEntryPtr = static_cast(TLD.ShadowStack) + (sizeof(StackEntry) * TLD.StackEntries); __sanitizer::internal_memcpy(&StackTop, StackEntryPtr, sizeof(StackEntry)); if (StackTop.FuncId == FuncId && StackTop.CPU == CPU && StackTop.TSC < TSC) { auto Delta = TSC - StackTop.TSC; if (Delta < thresholdTicks()) { assert(TLD.BufferOffset > 0); TLD.BufferOffset -= StackTop.Type == XRayEntryType::ENTRY ? 1 : 2; return; } } break; } default: // Should be unreachable. assert(false && "Unsupported XRayEntryType encountered."); break; } // First determine whether the delta between the function's enter record and // the exit record is higher than the threshold. __xray::XRayRecord R; R.RecordType = RecordTypes::NORMAL; R.CPU = CPU; R.TSC = TSC; R.TId = TLD.TID; R.Type = Type; R.FuncId = FuncId; auto EntryPtr = static_cast(InMemoryBuffer) + (sizeof(__xray::XRayRecord) * TLD.BufferOffset); __sanitizer::internal_memcpy(EntryPtr, &R, sizeof(R)); if (++TLD.BufferOffset == TLD.BufferSize) { __sanitizer::SpinMutexLock L(&LogMutex); auto RecordBuffer = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer); retryingWriteAll(Fd, reinterpret_cast(RecordBuffer), reinterpret_cast(RecordBuffer + TLD.BufferOffset)); TLD.BufferOffset = 0; TLD.StackEntries = 0; } } template void InMemoryRawLogWithArg(int32_t FuncId, XRayEntryType Type, uint64_t Arg1, RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT { auto &TLD = getThreadLocalData(); auto &InMemoryBuffer = TLD.InMemoryBuffer; auto &Offset = TLD.BufferOffset; const auto &BuffLen = TLD.BufferSize; int Fd = getGlobalFd(); if (Fd == -1) return; // First we check whether there's enough space to write the data consecutively // in the thread-local buffer. If not, we first flush the buffer before // attempting to write the two records that must be consecutive. if (Offset + 2 > BuffLen) { __sanitizer::SpinMutexLock L(&LogMutex); auto RecordBuffer = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer); retryingWriteAll(Fd, reinterpret_cast(RecordBuffer), reinterpret_cast(RecordBuffer + Offset)); Offset = 0; TLD.StackEntries = 0; } // Then we write the "we have an argument" record. InMemoryRawLog(FuncId, Type, ReadTSC); if (RecursionGuard) return; RecursionGuard = true; auto ExitGuard = __sanitizer::at_scope_exit([] { RecursionGuard = false; }); // And from here on write the arg payload. __xray::XRayArgPayload R; R.RecordType = RecordTypes::ARG_PAYLOAD; R.FuncId = FuncId; R.TId = TLD.TID; R.Arg = Arg1; auto EntryPtr = &reinterpret_cast<__xray::XRayArgPayload *>(&InMemoryBuffer)[Offset]; std::memcpy(EntryPtr, &R, sizeof(R)); if (++Offset == BuffLen) { __sanitizer::SpinMutexLock L(&LogMutex); auto RecordBuffer = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer); retryingWriteAll(Fd, reinterpret_cast(RecordBuffer), reinterpret_cast(RecordBuffer + Offset)); Offset = 0; TLD.StackEntries = 0; } } void basicLoggingHandleArg0RealTSC(int32_t FuncId, XRayEntryType Type) XRAY_NEVER_INSTRUMENT { InMemoryRawLog(FuncId, Type, __xray::readTSC); } void basicLoggingHandleArg0EmulateTSC(int32_t FuncId, XRayEntryType Type) XRAY_NEVER_INSTRUMENT { InMemoryRawLog(FuncId, Type, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT { timespec TS; int result = clock_gettime(CLOCK_REALTIME, &TS); if (result != 0) { Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno)); TS = {0, 0}; } CPU = 0; return TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec; }); } void basicLoggingHandleArg1RealTSC(int32_t FuncId, XRayEntryType Type, uint64_t Arg1) XRAY_NEVER_INSTRUMENT { InMemoryRawLogWithArg(FuncId, Type, Arg1, __xray::readTSC); } void basicLoggingHandleArg1EmulateTSC(int32_t FuncId, XRayEntryType Type, uint64_t Arg1) XRAY_NEVER_INSTRUMENT { InMemoryRawLogWithArg( FuncId, Type, Arg1, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT { timespec TS; int result = clock_gettime(CLOCK_REALTIME, &TS); if (result != 0) { Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno)); TS = {0, 0}; } CPU = 0; return TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec; }); } static void TLDDestructor(void *P) XRAY_NEVER_INSTRUMENT { ThreadLocalData &TLD = *reinterpret_cast(P); auto ExitGuard = __sanitizer::at_scope_exit([&TLD] { // Clean up dynamic resources. if (TLD.InMemoryBuffer) InternalFree(TLD.InMemoryBuffer); if (TLD.ShadowStack) InternalFree(TLD.ShadowStack); if (__sanitizer::Verbosity()) Report("Cleaned up log for TID: %d\n", TLD.TID); }); if (TLD.Fd == -1 || TLD.BufferOffset == 0) { if (__sanitizer::Verbosity()) Report("Skipping buffer for TID: %d; Fd = %d; Offset = %llu\n", TLD.TID, TLD.Fd, TLD.BufferOffset); return; } { __sanitizer::SpinMutexLock L(&LogMutex); retryingWriteAll(TLD.Fd, reinterpret_cast(TLD.InMemoryBuffer), reinterpret_cast(TLD.InMemoryBuffer) + (sizeof(__xray::XRayRecord) * TLD.BufferOffset)); } // Because this thread's exit could be the last one trying to write to // the file and that we're not able to close out the file properly, we // sync instead and hope that the pending writes are flushed as the // thread exits. fsync(TLD.Fd); } XRayLogInitStatus basicLoggingInit(size_t BufferSize, size_t BufferMax, void *Options, size_t OptionsSize) XRAY_NEVER_INSTRUMENT { static bool UNUSED Once = [] { pthread_key_create(&PThreadKey, TLDDestructor); return false; }(); uint8_t Expected = 0; if (!__sanitizer::atomic_compare_exchange_strong( &BasicInitialized, &Expected, 1, __sanitizer::memory_order_acq_rel)) { if (__sanitizer::Verbosity()) Report("Basic logging already initialized.\n"); return XRayLogInitStatus::XRAY_LOG_INITIALIZED; } if (OptionsSize != sizeof(BasicLoggingOptions)) { Report("Invalid options size, potential ABI mismatch; expected %d got %d", sizeof(BasicLoggingOptions), OptionsSize); return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; } static auto UseRealTSC = probeRequiredCPUFeatures(); if (!UseRealTSC && __sanitizer::Verbosity()) Report("WARNING: Required CPU features missing for XRay instrumentation, " "using emulation instead.\n"); GlobalOptions = *reinterpret_cast(Options); __xray_set_handler_arg1(UseRealTSC ? basicLoggingHandleArg1RealTSC : basicLoggingHandleArg1EmulateTSC); __xray_set_handler(UseRealTSC ? basicLoggingHandleArg0RealTSC : basicLoggingHandleArg0EmulateTSC); __xray_remove_customevent_handler(); return XRayLogInitStatus::XRAY_LOG_INITIALIZED; } XRayLogInitStatus basicLoggingFinalize() XRAY_NEVER_INSTRUMENT { uint8_t Expected = 0; if (!__sanitizer::atomic_compare_exchange_strong( &BasicInitialized, &Expected, 0, __sanitizer::memory_order_acq_rel) && __sanitizer::Verbosity()) Report("Basic logging already finalized.\n"); // Nothing really to do aside from marking state of the global to be // uninitialized. return XRayLogInitStatus::XRAY_LOG_FINALIZED; } XRayLogFlushStatus basicLoggingFlush() XRAY_NEVER_INSTRUMENT { // This really does nothing, since flushing the logs happen at the end of a // thread's lifetime, or when the buffers are full. return XRayLogFlushStatus::XRAY_LOG_FLUSHED; } // This is a handler that, effectively, does nothing. void basicLoggingHandleArg0Empty(int32_t, XRayEntryType) XRAY_NEVER_INSTRUMENT { } bool basicLogDynamicInitializer() XRAY_NEVER_INSTRUMENT { XRayLogImpl Impl{ basicLoggingInit, basicLoggingFinalize, basicLoggingHandleArg0Empty, basicLoggingFlush, }; auto RegistrationResult = __xray_log_register_mode("xray-basic", Impl); if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && __sanitizer::Verbosity()) Report("Cannot register XRay Basic Mode to 'xray-basic'; error = %d\n", RegistrationResult); if (flags()->xray_naive_log || !__sanitizer::internal_strcmp(flags()->xray_mode, "xray-basic")) { __xray_set_log_impl(Impl); BasicLoggingOptions Options; Options.DurationFilterMicros = flags()->xray_naive_log_func_duration_threshold_us; Options.MaxStackDepth = flags()->xray_naive_log_max_stack_depth; Options.ThreadBufferSize = flags()->xray_naive_log_thread_buffer_size; __xray_log_init(flags()->xray_naive_log_thread_buffer_size, 0, &Options, sizeof(BasicLoggingOptions)); static auto UNUSED Once = [] { static auto UNUSED &TLD = getThreadLocalData(); __sanitizer::Atexit(+[] { TLDDestructor(&TLD); }); return false; }(); } return true; } } // namespace __xray static auto UNUSED Unused = __xray::basicLogDynamicInitializer();