diff options
-rw-r--r-- | cmake/config-ix.cmake | 2 | ||||
-rwxr-xr-x | lib/sanitizer_common/scripts/gen_dynamic_list.py | 3 | ||||
-rw-r--r-- | lib/xray/CMakeLists.txt | 8 | ||||
-rw-r--r-- | lib/xray/xray_arm.cc | 131 | ||||
-rw-r--r-- | lib/xray/xray_inmemory_log.cc | 52 | ||||
-rw-r--r-- | lib/xray/xray_interface.cc | 141 | ||||
-rw-r--r-- | lib/xray/xray_interface_internal.h | 22 | ||||
-rw-r--r-- | lib/xray/xray_trampoline_arm.S | 65 | ||||
-rw-r--r-- | lib/xray/xray_x86_64.cc | 116 |
9 files changed, 117 insertions, 423 deletions
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake index 41d14433d..a96d0572e 100644 --- a/cmake/config-ix.cmake +++ b/cmake/config-ix.cmake @@ -161,7 +161,7 @@ set(ALL_SAFESTACK_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM64} ${MIPS32} ${MIPS64}) set(ALL_CFI_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64}) set(ALL_ESAN_SUPPORTED_ARCH ${X86_64}) set(ALL_SCUDO_SUPPORTED_ARCH ${X86_64}) -set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32}) +set(ALL_XRAY_SUPPORTED_ARCH ${X86_64}) if(APPLE) include(CompilerRTDarwinUtils) diff --git a/lib/sanitizer_common/scripts/gen_dynamic_list.py b/lib/sanitizer_common/scripts/gen_dynamic_list.py index 1d4230607..69f26f4e9 100755 --- a/lib/sanitizer_common/scripts/gen_dynamic_list.py +++ b/lib/sanitizer_common/scripts/gen_dynamic_list.py @@ -19,7 +19,6 @@ import os import re import subprocess import sys -import platform new_delete = set([ '_Znam', '_ZnamRKSt9nothrow_t', # operator new[](unsigned long) @@ -51,7 +50,7 @@ def get_global_functions(library): raise subprocess.CalledProcessError(nm_proc.returncode, nm) func_symbols = ['T', 'W'] # On PowerPC, nm prints function descriptors from .data section. - if platform.uname()[4] in ["powerpc", "ppc64"]: + if os.uname()[4] in ["powerpc", "ppc64"]: func_symbols += ['D'] for line in nm_out: cols = line.split(' ') diff --git a/lib/xray/CMakeLists.txt b/lib/xray/CMakeLists.txt index 12d9b7a53..bcd25297b 100644 --- a/lib/xray/CMakeLists.txt +++ b/lib/xray/CMakeLists.txt @@ -8,17 +8,9 @@ set(XRAY_SOURCES ) set(x86_64_SOURCES - xray_x86_64.cc xray_trampoline_x86_64.S ${XRAY_SOURCES}) -set(arm_SOURCES - xray_arm.cc - xray_trampoline_arm.S - ${XRAY_SOURCES}) - -set(armhf_SOURCES ${arm_SOURCES}) - include_directories(..) include_directories(../../include) diff --git a/lib/xray/xray_arm.cc b/lib/xray/xray_arm.cc deleted file mode 100644 index 60e7437c1..000000000 --- a/lib/xray/xray_arm.cc +++ /dev/null @@ -1,131 +0,0 @@ -//===-- xray_arm.cpp --------------------------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file is a part of XRay, a dynamic runtime instrumentation system. -// -// Implementation of ARM-specific routines (32-bit). -// -//===----------------------------------------------------------------------===// -#include "xray_interface_internal.h" -#include "sanitizer_common/sanitizer_common.h" -#include <atomic> -#include <cassert> - -namespace __xray { - -// The machine codes for some instructions used in runtime patching. -enum class PatchOpcodes : uint32_t -{ - PO_PushR0Lr = 0xE92D4001, // PUSH {r0, lr} - PO_BlxIp = 0xE12FFF3C, // BLX ip - PO_PopR0Lr = 0xE8BD4001, // POP {r0, lr} - PO_B20 = 0xEA000005 // B #20 -}; - -// 0xUUUUWXYZ -> 0x000W0XYZ -inline static uint32_t getMovwMask(const uint32_t Value) { - return (Value & 0xfff) | ((Value & 0xf000) << 4); -} - -// 0xWXYZUUUU -> 0x000W0XYZ -inline static uint32_t getMovtMask(const uint32_t Value) { - return getMovwMask(Value >> 16); -} - -// Writes the following instructions: -// MOVW R<regNo>, #<lower 16 bits of the |Value|> -// MOVT R<regNo>, #<higher 16 bits of the |Value|> -inline static uint32_t* write32bitLoadReg(uint8_t regNo, uint32_t* Address, - const uint32_t Value) { - //This is a fatal error: we cannot just report it and continue execution. - assert(regNo <= 15 && "Register number must be 0 to 15."); - // MOVW R, #0xWXYZ in machine code is 0xE30WRXYZ - *Address = (0xE3000000 | (uint32_t(regNo)<<12) | getMovwMask(Value)); - Address++; - // MOVT R, #0xWXYZ in machine code is 0xE34WRXYZ - *Address = (0xE3400000 | (uint32_t(regNo)<<12) | getMovtMask(Value)); - return Address + 1; -} - -// Writes the following instructions: -// MOVW r0, #<lower 16 bits of the |Value|> -// MOVT r0, #<higher 16 bits of the |Value|> -inline static uint32_t *Write32bitLoadR0(uint32_t *Address, - const uint32_t Value) { - return write32bitLoadReg(0, Address, Value); -} - -// Writes the following instructions: -// MOVW ip, #<lower 16 bits of the |Value|> -// MOVT ip, #<higher 16 bits of the |Value|> -inline static uint32_t *Write32bitLoadIP(uint32_t *Address, - const uint32_t Value) { - return write32bitLoadReg(12, Address, Value); -} - -inline static bool patchSled(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled, void (*TracingHook)()) { - // When |Enable| == true, - // We replace the following compile-time stub (sled): - // - // xray_sled_n: - // B #20 - // 6 NOPs (24 bytes) - // - // With the following runtime patch: - // - // xray_sled_n: - // PUSH {r0, lr} - // MOVW r0, #<lower 16 bits of function ID> - // MOVT r0, #<higher 16 bits of function ID> - // MOVW ip, #<lower 16 bits of address of TracingHook> - // MOVT ip, #<higher 16 bits of address of TracingHook> - // BLX ip - // POP {r0, lr} - // - // Replacement of the first 4-byte instruction should be the last and atomic - // operation, so that the user code which reaches the sled concurrently - // either jumps over the whole sled, or executes the whole sled when the - // latter is ready. - // - // When |Enable|==false, we set back the first instruction in the sled to be - // B #20 - - uint32_t *FirstAddress = reinterpret_cast<uint32_t *>(Sled.Address); - if (Enable) { - uint32_t *CurAddress = FirstAddress + 1; - CurAddress = - Write32bitLoadR0(CurAddress, reinterpret_cast<uint32_t>(FuncId)); - CurAddress = - Write32bitLoadIP(CurAddress, reinterpret_cast<uint32_t>(TracingHook)); - *CurAddress = uint32_t(PatchOpcodes::PO_BlxIp); - CurAddress++; - *CurAddress = uint32_t(PatchOpcodes::PO_PopR0Lr); - std::atomic_store_explicit( - reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress), - uint32_t(PatchOpcodes::PO_PushR0Lr), std::memory_order_release); - } else { - std::atomic_store_explicit( - reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress), - uint32_t(PatchOpcodes::PO_B20), std::memory_order_release); - } - return true; -} - -bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled) { - return patchSled(Enable, FuncId, Sled, __xray_FunctionEntry); -} - -bool patchFunctionExit(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled) { - return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); -} - -} // namespace __xray diff --git a/lib/xray/xray_inmemory_log.cc b/lib/xray/xray_inmemory_log.cc index 286ba7122..f4fd20846 100644 --- a/lib/xray/xray_inmemory_log.cc +++ b/lib/xray/xray_inmemory_log.cc @@ -24,14 +24,7 @@ #include <sys/types.h> #include <thread> #include <unistd.h> - -#if defined(__x86_64__) - #include <x86intrin.h> -#elif defined(__arm__) - static const int64_t NanosecondsPerSecond = 1000LL*1000*1000; -#else - #error "Unsupported CPU Architecture" -#endif /* CPU architecture */ +#include <x86intrin.h> #include "sanitizer_common/sanitizer_libc.h" #include "xray/xray_records.h" @@ -68,7 +61,6 @@ static void retryingWriteAll(int Fd, char *Begin, char *End) { } } -#if defined(__x86_64__) static std::pair<ssize_t, bool> retryingReadSome(int Fd, char *Begin, char *End) { auto BytesToRead = std::distance(Begin, End); @@ -111,8 +103,6 @@ static bool readValueFromFile(const char *Filename, long long *Value) { return Result; } -#endif /* CPU architecture */ - class ThreadExitFlusher { int Fd; XRayRecord *Start; @@ -174,7 +164,6 @@ void __xray_InMemoryRawLog(int32_t FuncId, XRayEntryType Type) { // Get the cycle frequency from SysFS on Linux. long long CPUFrequency = -1; -#if defined(__x86_64__) if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &CPUFrequency)) { CPUFrequency *= 1000; @@ -185,20 +174,6 @@ void __xray_InMemoryRawLog(int32_t FuncId, XRayEntryType Type) { } else { Report("Unable to determine CPU frequency for TSC accounting."); } -#elif defined(__arm__) - // There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does - // not have a constant frequency like TSC on x86(_64), it may go faster - // or slower depending on CPU turbo or power saving mode. Furthermore, - // to read from CP15 on ARM a kernel modification or a driver is needed. - // We can not require this from users of compiler-rt. - // So on ARM we use clock_gettime() which gives the result in nanoseconds. - // To get the measurements per second, we scale this by the number of - // nanoseconds per second, pretending that the TSC frequency is 1GHz and - // one TSC tick is 1 nanosecond. - CPUFrequency = NanosecondsPerSecond; -#else - #error "Unsupported CPU Architecture" -#endif /* CPU architecture */ // Since we're here, we get to write the header. We set it up so that the // header will only be written once, at the start, and let the threads @@ -226,29 +201,10 @@ void __xray_InMemoryRawLog(int32_t FuncId, XRayEntryType Type) { // First we get the useful data, and stuff it into the already aligned buffer // through a pointer offset. auto &R = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer)[Offset]; + unsigned CPU; R.RecordType = RecordTypes::NORMAL; -#if defined(__x86_64__) - { - unsigned CPU; - R.TSC = __rdtscp(&CPU); - R.CPU = CPU; - } -#elif defined(__arm__) - { - timespec TS; - int result = clock_gettime(CLOCK_REALTIME, &TS); - if(result != 0) - { - Report("clock_gettime() returned %d, errno=%d.", result, int(errno)); - TS.tv_sec = 0; - TS.tv_nsec = 0; - } - R.TSC = TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec; - R.CPU = 0; - } -#else - #error "Unsupported CPU Architecture" -#endif /* CPU architecture */ + R.TSC = __rdtscp(&CPU); + R.CPU = CPU; R.TId = TId; R.Type = Type; R.FuncId = FuncId; diff --git a/lib/xray/xray_interface.cc b/lib/xray/xray_interface.cc index 360a6ad5b..5ef3fc7aa 100644 --- a/lib/xray/xray_interface.cc +++ b/lib/xray/xray_interface.cc @@ -26,15 +26,6 @@ namespace __xray { -#if defined(__x86_64__) - // FIXME: The actual length is 11 bytes. Why was length 12 passed to mprotect() ? - static const int16_t cSledLength = 12; -#elif defined(__arm__) - static const int16_t cSledLength = 28; -#else - #error "Unsupported CPU Architecture" -#endif /* CPU architecture */ - // This is the function to call when we encounter the entry or exit sleds. std::atomic<void (*)(int32_t, XRayEntryType)> XRayPatchedFunction{nullptr}; @@ -73,6 +64,13 @@ public: } // namespace __xray +extern "C" { +// The following functions have to be defined in assembler, on a per-platform +// basis. See xray_trampoline_*.s files for implementations. +extern void __xray_FunctionEntry(); +extern void __xray_FunctionExit(); +} + extern std::atomic<bool> XRayInitialized; extern std::atomic<__xray::XRaySledMap> XRayInstrMap; @@ -135,13 +133,12 @@ XRayPatchingStatus ControlPatching(bool Enable) { if (InstrMap.Entries == 0) return XRayPatchingStatus::NOT_INITIALIZED; - const uint64_t PageSize = GetPageSizeCached(); - if((PageSize == 0) || ( (PageSize & (PageSize-1)) != 0) ) { - Report("System page size is not a power of two: %lld", PageSize); - return XRayPatchingStatus::FAILED; - } - - uint32_t FuncId = 1; + int32_t FuncId = 1; + static constexpr uint8_t CallOpCode = 0xe8; + static constexpr uint16_t MovR10Seq = 0xba41; + static constexpr uint16_t Jmp9Seq = 0x09eb; + static constexpr uint8_t JmpOpCode = 0xe9; + static constexpr uint8_t RetOpCode = 0xc3; uint64_t CurFun = 0; for (std::size_t I = 0; I < InstrMap.Entries; I++) { auto Sled = InstrMap.Sleds[I]; @@ -156,28 +153,112 @@ XRayPatchingStatus ControlPatching(bool Enable) { // While we're here, we should patch the nop sled. To do that we mprotect // the page containing the function to be writeable. void *PageAlignedAddr = - reinterpret_cast<void *>(Sled.Address & ~(PageSize-1)); + reinterpret_cast<void *>(Sled.Address & ~((2 << 16) - 1)); std::size_t MProtectLen = - (Sled.Address + cSledLength) - reinterpret_cast<uint64_t>(PageAlignedAddr); + (Sled.Address + 12) - reinterpret_cast<uint64_t>(PageAlignedAddr); MProtectHelper Protector(PageAlignedAddr, MProtectLen); if (Protector.MakeWriteable() == -1) { printf("Failed mprotect: %d\n", errno); return XRayPatchingStatus::FAILED; } - bool Success = false; - switch(Sled.Kind) { - case XRayEntryType::ENTRY: - Success = patchFunctionEntry(Enable, FuncId, Sled); - break; - case XRayEntryType::EXIT: - Success = patchFunctionExit(Enable, FuncId, Sled); - break; - default: - Report("Unsupported sled kind: %d", int(Sled.Kind)); - continue; + static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()}; + static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()}; + if (Sled.Kind == XRayEntryType::ENTRY) { + // FIXME: Implement this in a more extensible manner, per-platform. + // Here we do the dance of replacing the following sled: + // + // xray_sled_n: + // jmp +9 + // <9 byte nop> + // + // With the following: + // + // mov r10d, <function id> + // call <relative 32bit offset to entry trampoline> + // + // We need to do this in the following order: + // + // 1. Put the function id first, 2 bytes from the start of the sled (just + // after the 2-byte jmp instruction). + // 2. Put the call opcode 6 bytes from the start of the sled. + // 3. Put the relative offset 7 bytes from the start of the sled. + // 4. Do an atomic write over the jmp instruction for the "mov r10d" + // opcode and first operand. + // + // Prerequisite is to compute the relative offset to the + // __xray_FunctionEntry function's address. + int64_t TrampolineOffset = + reinterpret_cast<int64_t>(__xray_FunctionEntry) - + (static_cast<int64_t>(Sled.Address) + 11); + if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { + Report("XRay Entry trampoline (%p) too far from sled (%p); distance = " + "%ld\n", + __xray_FunctionEntry, reinterpret_cast<void *>(Sled.Address), + TrampolineOffset); + continue; + } + if (Enable) { + *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId; + *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode; + *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset; + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq, + std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq, + std::memory_order_release); + // FIXME: Write out the nops still? + } + } + + if (Sled.Kind == XRayEntryType::EXIT) { + // FIXME: Implement this in a more extensible manner, per-platform. + // Here we do the dance of replacing the following sled: + // + // xray_sled_n: + // ret + // <10 byte nop> + // + // With the following: + // + // mov r10d, <function id> + // jmp <relative 32bit offset to exit trampoline> + // + // 1. Put the function id first, 2 bytes from the start of the sled (just + // after the 1-byte ret instruction). + // 2. Put the jmp opcode 6 bytes from the start of the sled. + // 3. Put the relative offset 7 bytes from the start of the sled. + // 4. Do an atomic write over the jmp instruction for the "mov r10d" + // opcode and first operand. + // + // Prerequisite is to compute the relative offset fo the + // __xray_FunctionExit function's address. + int64_t TrampolineOffset = + reinterpret_cast<int64_t>(__xray_FunctionExit) - + (static_cast<int64_t>(Sled.Address) + 11); + if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { + Report("XRay Exit trampoline (%p) too far from sled (%p); distance = " + "%ld\n", + __xray_FunctionExit, reinterpret_cast<void *>(Sled.Address), + TrampolineOffset); + continue; + } + if (Enable) { + *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId; + *reinterpret_cast<uint8_t *>(Sled.Address + 6) = JmpOpCode; + *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset; + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq, + std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint8_t> *>(Sled.Address), RetOpCode, + std::memory_order_release); + // FIXME: Write out the nops still? + } } - (void)Success; } XRayPatching.store(false, std::memory_order_release); PatchingSuccess = true; diff --git a/lib/xray/xray_interface_internal.h b/lib/xray/xray_interface_internal.h index 3465b67d5..6208c110e 100644 --- a/lib/xray/xray_interface_internal.h +++ b/lib/xray/xray_interface_internal.h @@ -16,30 +16,18 @@ #define XRAY_INTERFACE_INTERNAL_H #include "xray/xray_interface.h" -#include "sanitizer_common/sanitizer_platform.h" #include <cstddef> #include <cstdint> extern "C" { struct XRaySledEntry { -#if SANITIZER_WORDSIZE == 64 uint64_t Address; uint64_t Function; unsigned char Kind; unsigned char AlwaysInstrument; unsigned char Padding[14]; // Need 32 bytes -#elif SANITIZER_WORDSIZE == 32 - uint32_t Address; - uint32_t Function; - unsigned char Kind; - unsigned char AlwaysInstrument; - unsigned char Padding[6]; // Need 16 bytes -#else - #error "Unsupported word size." -#endif }; - } namespace __xray { @@ -49,16 +37,6 @@ struct XRaySledMap { size_t Entries; }; -bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, const XRaySledEntry& Sled); -bool patchFunctionExit(const bool Enable, const uint32_t FuncId, const XRaySledEntry& Sled); - } // namespace __xray -extern "C" { -// The following functions have to be defined in assembler, on a per-platform -// basis. See xray_trampoline_*.S files for implementations. -extern void __xray_FunctionEntry(); -extern void __xray_FunctionExit(); -} - #endif diff --git a/lib/xray/xray_trampoline_arm.S b/lib/xray/xray_trampoline_arm.S deleted file mode 100644 index 225bb50ce..000000000 --- a/lib/xray/xray_trampoline_arm.S +++ /dev/null @@ -1,65 +0,0 @@ - .syntax unified - .arch armv7 - .fpu vfpv3 - .code 32 - .global _ZN6__xray19XRayPatchedFunctionE - @ Word-aligned function entry point - .p2align 2 - @ Let C/C++ see the symbol - .global __xray_FunctionEntry - @ It preserves all registers except r0, r12(ip), r14(lr) and r15(pc) - @ Assume that "q" part of the floating-point registers is not used - @ for passing parameters to C/C++ functions. - .type __xray_FunctionEntry, %function - @ In C++ it is void extern "C" __xray_FunctionEntry(uint32_t FuncId) with - @ FuncId passed in r0 register. -__xray_FunctionEntry: - PUSH {r1-r3,lr} - @ Save floating-point parameters of the instrumented function - VPUSH {d0-d7} - MOVW r1,#:lower16:_ZN6__xray19XRayPatchedFunctionE - MOVT r1,#:upper16:_ZN6__xray19XRayPatchedFunctionE - LDR r2, [r1] - @ Handler address is nullptr if handler is not set - CMP r2, #0 - BEQ FunctionEntry_restore - @ Function ID is already in r0 (the first parameter). - @ r1=0 means that we are tracing an entry event - MOV r1, #0 - @ Call the handler with 2 parameters in r0 and r1 - BLX r2 -FunctionEntry_restore: - @ Restore floating-point parameters of the instrumented function - VPOP {d0-d7} - POP {r1-r3,pc} - - @ Word-aligned function entry point - .p2align 2 - @ Let C/C++ see the symbol - .global __xray_FunctionExit - @ Assume that d1-d7 are not used for the return value. - @ Assume that "q" part of the floating-point registers is not used for the - @ return value in C/C++. - .type __xray_FunctionExit, %function - @ In C++ it is extern "C" void __xray_FunctionExit(uint32_t FuncId) with - @ FuncId passed in r0 register. -__xray_FunctionExit: - PUSH {r1-r3,lr} - @ Save the floating-point return value of the instrumented function - VPUSH {d0} - @ Load the handler address - MOVW r1,#:lower16:_ZN6__xray19XRayPatchedFunctionE - MOVT r1,#:upper16:_ZN6__xray19XRayPatchedFunctionE - LDR r2, [r1] - @ Handler address is nullptr if handler is not set - CMP r2, #0 - BEQ FunctionExit_restore - @ Function ID is already in r0 (the first parameter). - @ 1 means that we are tracing an exit event - MOV r1, #1 - @ Call the handler with 2 parameters in r0 and r1 - BLX r2 -FunctionExit_restore: - @ Restore the floating-point return value of the instrumented function - VPOP {d0} - POP {r1-r3,pc} diff --git a/lib/xray/xray_x86_64.cc b/lib/xray/xray_x86_64.cc deleted file mode 100644 index 398d6fc14..000000000 --- a/lib/xray/xray_x86_64.cc +++ /dev/null @@ -1,116 +0,0 @@ -#include "xray_interface_internal.h" -#include "sanitizer_common/sanitizer_common.h" -#include <atomic> -#include <cstdint> -#include <limits> - -namespace __xray { - -static constexpr uint8_t CallOpCode = 0xe8; -static constexpr uint16_t MovR10Seq = 0xba41; -static constexpr uint16_t Jmp9Seq = 0x09eb; -static constexpr uint8_t JmpOpCode = 0xe9; -static constexpr uint8_t RetOpCode = 0xc3; - -static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()}; -static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()}; - -bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, const XRaySledEntry& Sled) -{ - // Here we do the dance of replacing the following sled: - // - // xray_sled_n: - // jmp +9 - // <9 byte nop> - // - // With the following: - // - // mov r10d, <function id> - // call <relative 32bit offset to entry trampoline> - // - // We need to do this in the following order: - // - // 1. Put the function id first, 2 bytes from the start of the sled (just - // after the 2-byte jmp instruction). - // 2. Put the call opcode 6 bytes from the start of the sled. - // 3. Put the relative offset 7 bytes from the start of the sled. - // 4. Do an atomic write over the jmp instruction for the "mov r10d" - // opcode and first operand. - // - // Prerequisite is to compute the relative offset to the - // __xray_FunctionEntry function's address. - int64_t TrampolineOffset = - reinterpret_cast<int64_t>(__xray_FunctionEntry) - - (static_cast<int64_t>(Sled.Address) + 11); - if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { - Report("XRay Entry trampoline (%p) too far from sled (%p); distance = " - "%ld\n", - __xray_FunctionEntry, reinterpret_cast<void *>(Sled.Address), - TrampolineOffset); - return false; - } - if (Enable) { - *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId; - *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode; - *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset; - std::atomic_store_explicit( - reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq, - std::memory_order_release); - } else { - std::atomic_store_explicit( - reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq, - std::memory_order_release); - // FIXME: Write out the nops still? - } - return true; -} - -bool patchFunctionExit(const bool Enable, const uint32_t FuncId, const XRaySledEntry& Sled) -{ - // Here we do the dance of replacing the following sled: - // - // xray_sled_n: - // ret - // <10 byte nop> - // - // With the following: - // - // mov r10d, <function id> - // jmp <relative 32bit offset to exit trampoline> - // - // 1. Put the function id first, 2 bytes from the start of the sled (just - // after the 1-byte ret instruction). - // 2. Put the jmp opcode 6 bytes from the start of the sled. - // 3. Put the relative offset 7 bytes from the start of the sled. - // 4. Do an atomic write over the jmp instruction for the "mov r10d" - // opcode and first operand. - // - // Prerequisite is to compute the relative offset fo the - // __xray_FunctionExit function's address. - int64_t TrampolineOffset = - reinterpret_cast<int64_t>(__xray_FunctionExit) - - (static_cast<int64_t>(Sled.Address) + 11); - if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { - Report("XRay Exit trampoline (%p) too far from sled (%p); distance = " - "%ld\n", - __xray_FunctionExit, reinterpret_cast<void *>(Sled.Address), - TrampolineOffset); - return false; - } - if (Enable) { - *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId; - *reinterpret_cast<uint8_t *>(Sled.Address + 6) = JmpOpCode; - *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset; - std::atomic_store_explicit( - reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq, - std::memory_order_release); - } else { - std::atomic_store_explicit( - reinterpret_cast<std::atomic<uint8_t> *>(Sled.Address), RetOpCode, - std::memory_order_release); - // FIXME: Write out the nops still? - } - return true; -} - -} // namespace __xray |