summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cmake/config-ix.cmake2
-rwxr-xr-xlib/sanitizer_common/scripts/gen_dynamic_list.py3
-rw-r--r--lib/xray/CMakeLists.txt8
-rw-r--r--lib/xray/xray_arm.cc131
-rw-r--r--lib/xray/xray_inmemory_log.cc52
-rw-r--r--lib/xray/xray_interface.cc141
-rw-r--r--lib/xray/xray_interface_internal.h22
-rw-r--r--lib/xray/xray_trampoline_arm.S65
-rw-r--r--lib/xray/xray_x86_64.cc116
9 files changed, 117 insertions, 423 deletions
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index 41d14433d..a96d0572e 100644
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -161,7 +161,7 @@ set(ALL_SAFESTACK_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM64} ${MIPS32} ${MIPS64})
set(ALL_CFI_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64})
set(ALL_ESAN_SUPPORTED_ARCH ${X86_64})
set(ALL_SCUDO_SUPPORTED_ARCH ${X86_64})
-set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32})
+set(ALL_XRAY_SUPPORTED_ARCH ${X86_64})
if(APPLE)
include(CompilerRTDarwinUtils)
diff --git a/lib/sanitizer_common/scripts/gen_dynamic_list.py b/lib/sanitizer_common/scripts/gen_dynamic_list.py
index 1d4230607..69f26f4e9 100755
--- a/lib/sanitizer_common/scripts/gen_dynamic_list.py
+++ b/lib/sanitizer_common/scripts/gen_dynamic_list.py
@@ -19,7 +19,6 @@ import os
import re
import subprocess
import sys
-import platform
new_delete = set([
'_Znam', '_ZnamRKSt9nothrow_t', # operator new[](unsigned long)
@@ -51,7 +50,7 @@ def get_global_functions(library):
raise subprocess.CalledProcessError(nm_proc.returncode, nm)
func_symbols = ['T', 'W']
# On PowerPC, nm prints function descriptors from .data section.
- if platform.uname()[4] in ["powerpc", "ppc64"]:
+ if os.uname()[4] in ["powerpc", "ppc64"]:
func_symbols += ['D']
for line in nm_out:
cols = line.split(' ')
diff --git a/lib/xray/CMakeLists.txt b/lib/xray/CMakeLists.txt
index 12d9b7a53..bcd25297b 100644
--- a/lib/xray/CMakeLists.txt
+++ b/lib/xray/CMakeLists.txt
@@ -8,17 +8,9 @@ set(XRAY_SOURCES
)
set(x86_64_SOURCES
- xray_x86_64.cc
xray_trampoline_x86_64.S
${XRAY_SOURCES})
-set(arm_SOURCES
- xray_arm.cc
- xray_trampoline_arm.S
- ${XRAY_SOURCES})
-
-set(armhf_SOURCES ${arm_SOURCES})
-
include_directories(..)
include_directories(../../include)
diff --git a/lib/xray/xray_arm.cc b/lib/xray/xray_arm.cc
deleted file mode 100644
index 60e7437c1..000000000
--- a/lib/xray/xray_arm.cc
+++ /dev/null
@@ -1,131 +0,0 @@
-//===-- xray_arm.cpp --------------------------------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is a part of XRay, a dynamic runtime instrumentation system.
-//
-// Implementation of ARM-specific routines (32-bit).
-//
-//===----------------------------------------------------------------------===//
-#include "xray_interface_internal.h"
-#include "sanitizer_common/sanitizer_common.h"
-#include <atomic>
-#include <cassert>
-
-namespace __xray {
-
-// The machine codes for some instructions used in runtime patching.
-enum class PatchOpcodes : uint32_t
-{
- PO_PushR0Lr = 0xE92D4001, // PUSH {r0, lr}
- PO_BlxIp = 0xE12FFF3C, // BLX ip
- PO_PopR0Lr = 0xE8BD4001, // POP {r0, lr}
- PO_B20 = 0xEA000005 // B #20
-};
-
-// 0xUUUUWXYZ -> 0x000W0XYZ
-inline static uint32_t getMovwMask(const uint32_t Value) {
- return (Value & 0xfff) | ((Value & 0xf000) << 4);
-}
-
-// 0xWXYZUUUU -> 0x000W0XYZ
-inline static uint32_t getMovtMask(const uint32_t Value) {
- return getMovwMask(Value >> 16);
-}
-
-// Writes the following instructions:
-// MOVW R<regNo>, #<lower 16 bits of the |Value|>
-// MOVT R<regNo>, #<higher 16 bits of the |Value|>
-inline static uint32_t* write32bitLoadReg(uint8_t regNo, uint32_t* Address,
- const uint32_t Value) {
- //This is a fatal error: we cannot just report it and continue execution.
- assert(regNo <= 15 && "Register number must be 0 to 15.");
- // MOVW R, #0xWXYZ in machine code is 0xE30WRXYZ
- *Address = (0xE3000000 | (uint32_t(regNo)<<12) | getMovwMask(Value));
- Address++;
- // MOVT R, #0xWXYZ in machine code is 0xE34WRXYZ
- *Address = (0xE3400000 | (uint32_t(regNo)<<12) | getMovtMask(Value));
- return Address + 1;
-}
-
-// Writes the following instructions:
-// MOVW r0, #<lower 16 bits of the |Value|>
-// MOVT r0, #<higher 16 bits of the |Value|>
-inline static uint32_t *Write32bitLoadR0(uint32_t *Address,
- const uint32_t Value) {
- return write32bitLoadReg(0, Address, Value);
-}
-
-// Writes the following instructions:
-// MOVW ip, #<lower 16 bits of the |Value|>
-// MOVT ip, #<higher 16 bits of the |Value|>
-inline static uint32_t *Write32bitLoadIP(uint32_t *Address,
- const uint32_t Value) {
- return write32bitLoadReg(12, Address, Value);
-}
-
-inline static bool patchSled(const bool Enable, const uint32_t FuncId,
- const XRaySledEntry &Sled, void (*TracingHook)()) {
- // When |Enable| == true,
- // We replace the following compile-time stub (sled):
- //
- // xray_sled_n:
- // B #20
- // 6 NOPs (24 bytes)
- //
- // With the following runtime patch:
- //
- // xray_sled_n:
- // PUSH {r0, lr}
- // MOVW r0, #<lower 16 bits of function ID>
- // MOVT r0, #<higher 16 bits of function ID>
- // MOVW ip, #<lower 16 bits of address of TracingHook>
- // MOVT ip, #<higher 16 bits of address of TracingHook>
- // BLX ip
- // POP {r0, lr}
- //
- // Replacement of the first 4-byte instruction should be the last and atomic
- // operation, so that the user code which reaches the sled concurrently
- // either jumps over the whole sled, or executes the whole sled when the
- // latter is ready.
- //
- // When |Enable|==false, we set back the first instruction in the sled to be
- // B #20
-
- uint32_t *FirstAddress = reinterpret_cast<uint32_t *>(Sled.Address);
- if (Enable) {
- uint32_t *CurAddress = FirstAddress + 1;
- CurAddress =
- Write32bitLoadR0(CurAddress, reinterpret_cast<uint32_t>(FuncId));
- CurAddress =
- Write32bitLoadIP(CurAddress, reinterpret_cast<uint32_t>(TracingHook));
- *CurAddress = uint32_t(PatchOpcodes::PO_BlxIp);
- CurAddress++;
- *CurAddress = uint32_t(PatchOpcodes::PO_PopR0Lr);
- std::atomic_store_explicit(
- reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
- uint32_t(PatchOpcodes::PO_PushR0Lr), std::memory_order_release);
- } else {
- std::atomic_store_explicit(
- reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
- uint32_t(PatchOpcodes::PO_B20), std::memory_order_release);
- }
- return true;
-}
-
-bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
- const XRaySledEntry &Sled) {
- return patchSled(Enable, FuncId, Sled, __xray_FunctionEntry);
-}
-
-bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
- const XRaySledEntry &Sled) {
- return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
-}
-
-} // namespace __xray
diff --git a/lib/xray/xray_inmemory_log.cc b/lib/xray/xray_inmemory_log.cc
index 286ba7122..f4fd20846 100644
--- a/lib/xray/xray_inmemory_log.cc
+++ b/lib/xray/xray_inmemory_log.cc
@@ -24,14 +24,7 @@
#include <sys/types.h>
#include <thread>
#include <unistd.h>
-
-#if defined(__x86_64__)
- #include <x86intrin.h>
-#elif defined(__arm__)
- static const int64_t NanosecondsPerSecond = 1000LL*1000*1000;
-#else
- #error "Unsupported CPU Architecture"
-#endif /* CPU architecture */
+#include <x86intrin.h>
#include "sanitizer_common/sanitizer_libc.h"
#include "xray/xray_records.h"
@@ -68,7 +61,6 @@ static void retryingWriteAll(int Fd, char *Begin, char *End) {
}
}
-#if defined(__x86_64__)
static std::pair<ssize_t, bool> retryingReadSome(int Fd, char *Begin,
char *End) {
auto BytesToRead = std::distance(Begin, End);
@@ -111,8 +103,6 @@ static bool readValueFromFile(const char *Filename, long long *Value) {
return Result;
}
-#endif /* CPU architecture */
-
class ThreadExitFlusher {
int Fd;
XRayRecord *Start;
@@ -174,7 +164,6 @@ void __xray_InMemoryRawLog(int32_t FuncId, XRayEntryType Type) {
// Get the cycle frequency from SysFS on Linux.
long long CPUFrequency = -1;
-#if defined(__x86_64__)
if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz",
&CPUFrequency)) {
CPUFrequency *= 1000;
@@ -185,20 +174,6 @@ void __xray_InMemoryRawLog(int32_t FuncId, XRayEntryType Type) {
} else {
Report("Unable to determine CPU frequency for TSC accounting.");
}
-#elif defined(__arm__)
- // There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does
- // not have a constant frequency like TSC on x86(_64), it may go faster
- // or slower depending on CPU turbo or power saving mode. Furthermore,
- // to read from CP15 on ARM a kernel modification or a driver is needed.
- // We can not require this from users of compiler-rt.
- // So on ARM we use clock_gettime() which gives the result in nanoseconds.
- // To get the measurements per second, we scale this by the number of
- // nanoseconds per second, pretending that the TSC frequency is 1GHz and
- // one TSC tick is 1 nanosecond.
- CPUFrequency = NanosecondsPerSecond;
-#else
- #error "Unsupported CPU Architecture"
-#endif /* CPU architecture */
// Since we're here, we get to write the header. We set it up so that the
// header will only be written once, at the start, and let the threads
@@ -226,29 +201,10 @@ void __xray_InMemoryRawLog(int32_t FuncId, XRayEntryType Type) {
// First we get the useful data, and stuff it into the already aligned buffer
// through a pointer offset.
auto &R = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer)[Offset];
+ unsigned CPU;
R.RecordType = RecordTypes::NORMAL;
-#if defined(__x86_64__)
- {
- unsigned CPU;
- R.TSC = __rdtscp(&CPU);
- R.CPU = CPU;
- }
-#elif defined(__arm__)
- {
- timespec TS;
- int result = clock_gettime(CLOCK_REALTIME, &TS);
- if(result != 0)
- {
- Report("clock_gettime() returned %d, errno=%d.", result, int(errno));
- TS.tv_sec = 0;
- TS.tv_nsec = 0;
- }
- R.TSC = TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec;
- R.CPU = 0;
- }
-#else
- #error "Unsupported CPU Architecture"
-#endif /* CPU architecture */
+ R.TSC = __rdtscp(&CPU);
+ R.CPU = CPU;
R.TId = TId;
R.Type = Type;
R.FuncId = FuncId;
diff --git a/lib/xray/xray_interface.cc b/lib/xray/xray_interface.cc
index 360a6ad5b..5ef3fc7aa 100644
--- a/lib/xray/xray_interface.cc
+++ b/lib/xray/xray_interface.cc
@@ -26,15 +26,6 @@
namespace __xray {
-#if defined(__x86_64__)
- // FIXME: The actual length is 11 bytes. Why was length 12 passed to mprotect() ?
- static const int16_t cSledLength = 12;
-#elif defined(__arm__)
- static const int16_t cSledLength = 28;
-#else
- #error "Unsupported CPU Architecture"
-#endif /* CPU architecture */
-
// This is the function to call when we encounter the entry or exit sleds.
std::atomic<void (*)(int32_t, XRayEntryType)> XRayPatchedFunction{nullptr};
@@ -73,6 +64,13 @@ public:
} // namespace __xray
+extern "C" {
+// The following functions have to be defined in assembler, on a per-platform
+// basis. See xray_trampoline_*.s files for implementations.
+extern void __xray_FunctionEntry();
+extern void __xray_FunctionExit();
+}
+
extern std::atomic<bool> XRayInitialized;
extern std::atomic<__xray::XRaySledMap> XRayInstrMap;
@@ -135,13 +133,12 @@ XRayPatchingStatus ControlPatching(bool Enable) {
if (InstrMap.Entries == 0)
return XRayPatchingStatus::NOT_INITIALIZED;
- const uint64_t PageSize = GetPageSizeCached();
- if((PageSize == 0) || ( (PageSize & (PageSize-1)) != 0) ) {
- Report("System page size is not a power of two: %lld", PageSize);
- return XRayPatchingStatus::FAILED;
- }
-
- uint32_t FuncId = 1;
+ int32_t FuncId = 1;
+ static constexpr uint8_t CallOpCode = 0xe8;
+ static constexpr uint16_t MovR10Seq = 0xba41;
+ static constexpr uint16_t Jmp9Seq = 0x09eb;
+ static constexpr uint8_t JmpOpCode = 0xe9;
+ static constexpr uint8_t RetOpCode = 0xc3;
uint64_t CurFun = 0;
for (std::size_t I = 0; I < InstrMap.Entries; I++) {
auto Sled = InstrMap.Sleds[I];
@@ -156,28 +153,112 @@ XRayPatchingStatus ControlPatching(bool Enable) {
// While we're here, we should patch the nop sled. To do that we mprotect
// the page containing the function to be writeable.
void *PageAlignedAddr =
- reinterpret_cast<void *>(Sled.Address & ~(PageSize-1));
+ reinterpret_cast<void *>(Sled.Address & ~((2 << 16) - 1));
std::size_t MProtectLen =
- (Sled.Address + cSledLength) - reinterpret_cast<uint64_t>(PageAlignedAddr);
+ (Sled.Address + 12) - reinterpret_cast<uint64_t>(PageAlignedAddr);
MProtectHelper Protector(PageAlignedAddr, MProtectLen);
if (Protector.MakeWriteable() == -1) {
printf("Failed mprotect: %d\n", errno);
return XRayPatchingStatus::FAILED;
}
- bool Success = false;
- switch(Sled.Kind) {
- case XRayEntryType::ENTRY:
- Success = patchFunctionEntry(Enable, FuncId, Sled);
- break;
- case XRayEntryType::EXIT:
- Success = patchFunctionExit(Enable, FuncId, Sled);
- break;
- default:
- Report("Unsupported sled kind: %d", int(Sled.Kind));
- continue;
+ static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()};
+ static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()};
+ if (Sled.Kind == XRayEntryType::ENTRY) {
+ // FIXME: Implement this in a more extensible manner, per-platform.
+ // Here we do the dance of replacing the following sled:
+ //
+ // xray_sled_n:
+ // jmp +9
+ // <9 byte nop>
+ //
+ // With the following:
+ //
+ // mov r10d, <function id>
+ // call <relative 32bit offset to entry trampoline>
+ //
+ // We need to do this in the following order:
+ //
+ // 1. Put the function id first, 2 bytes from the start of the sled (just
+ // after the 2-byte jmp instruction).
+ // 2. Put the call opcode 6 bytes from the start of the sled.
+ // 3. Put the relative offset 7 bytes from the start of the sled.
+ // 4. Do an atomic write over the jmp instruction for the "mov r10d"
+ // opcode and first operand.
+ //
+ // Prerequisite is to compute the relative offset to the
+ // __xray_FunctionEntry function's address.
+ int64_t TrampolineOffset =
+ reinterpret_cast<int64_t>(__xray_FunctionEntry) -
+ (static_cast<int64_t>(Sled.Address) + 11);
+ if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
+ Report("XRay Entry trampoline (%p) too far from sled (%p); distance = "
+ "%ld\n",
+ __xray_FunctionEntry, reinterpret_cast<void *>(Sled.Address),
+ TrampolineOffset);
+ continue;
+ }
+ if (Enable) {
+ *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
+ *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode;
+ *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
+ std::memory_order_release);
+ } else {
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq,
+ std::memory_order_release);
+ // FIXME: Write out the nops still?
+ }
+ }
+
+ if (Sled.Kind == XRayEntryType::EXIT) {
+ // FIXME: Implement this in a more extensible manner, per-platform.
+ // Here we do the dance of replacing the following sled:
+ //
+ // xray_sled_n:
+ // ret
+ // <10 byte nop>
+ //
+ // With the following:
+ //
+ // mov r10d, <function id>
+ // jmp <relative 32bit offset to exit trampoline>
+ //
+ // 1. Put the function id first, 2 bytes from the start of the sled (just
+ // after the 1-byte ret instruction).
+ // 2. Put the jmp opcode 6 bytes from the start of the sled.
+ // 3. Put the relative offset 7 bytes from the start of the sled.
+ // 4. Do an atomic write over the jmp instruction for the "mov r10d"
+ // opcode and first operand.
+ //
+ // Prerequisite is to compute the relative offset fo the
+ // __xray_FunctionExit function's address.
+ int64_t TrampolineOffset =
+ reinterpret_cast<int64_t>(__xray_FunctionExit) -
+ (static_cast<int64_t>(Sled.Address) + 11);
+ if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
+ Report("XRay Exit trampoline (%p) too far from sled (%p); distance = "
+ "%ld\n",
+ __xray_FunctionExit, reinterpret_cast<void *>(Sled.Address),
+ TrampolineOffset);
+ continue;
+ }
+ if (Enable) {
+ *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
+ *reinterpret_cast<uint8_t *>(Sled.Address + 6) = JmpOpCode;
+ *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
+ std::memory_order_release);
+ } else {
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint8_t> *>(Sled.Address), RetOpCode,
+ std::memory_order_release);
+ // FIXME: Write out the nops still?
+ }
}
- (void)Success;
}
XRayPatching.store(false, std::memory_order_release);
PatchingSuccess = true;
diff --git a/lib/xray/xray_interface_internal.h b/lib/xray/xray_interface_internal.h
index 3465b67d5..6208c110e 100644
--- a/lib/xray/xray_interface_internal.h
+++ b/lib/xray/xray_interface_internal.h
@@ -16,30 +16,18 @@
#define XRAY_INTERFACE_INTERNAL_H
#include "xray/xray_interface.h"
-#include "sanitizer_common/sanitizer_platform.h"
#include <cstddef>
#include <cstdint>
extern "C" {
struct XRaySledEntry {
-#if SANITIZER_WORDSIZE == 64
uint64_t Address;
uint64_t Function;
unsigned char Kind;
unsigned char AlwaysInstrument;
unsigned char Padding[14]; // Need 32 bytes
-#elif SANITIZER_WORDSIZE == 32
- uint32_t Address;
- uint32_t Function;
- unsigned char Kind;
- unsigned char AlwaysInstrument;
- unsigned char Padding[6]; // Need 16 bytes
-#else
- #error "Unsupported word size."
-#endif
};
-
}
namespace __xray {
@@ -49,16 +37,6 @@ struct XRaySledMap {
size_t Entries;
};
-bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, const XRaySledEntry& Sled);
-bool patchFunctionExit(const bool Enable, const uint32_t FuncId, const XRaySledEntry& Sled);
-
} // namespace __xray
-extern "C" {
-// The following functions have to be defined in assembler, on a per-platform
-// basis. See xray_trampoline_*.S files for implementations.
-extern void __xray_FunctionEntry();
-extern void __xray_FunctionExit();
-}
-
#endif
diff --git a/lib/xray/xray_trampoline_arm.S b/lib/xray/xray_trampoline_arm.S
deleted file mode 100644
index 225bb50ce..000000000
--- a/lib/xray/xray_trampoline_arm.S
+++ /dev/null
@@ -1,65 +0,0 @@
- .syntax unified
- .arch armv7
- .fpu vfpv3
- .code 32
- .global _ZN6__xray19XRayPatchedFunctionE
- @ Word-aligned function entry point
- .p2align 2
- @ Let C/C++ see the symbol
- .global __xray_FunctionEntry
- @ It preserves all registers except r0, r12(ip), r14(lr) and r15(pc)
- @ Assume that "q" part of the floating-point registers is not used
- @ for passing parameters to C/C++ functions.
- .type __xray_FunctionEntry, %function
- @ In C++ it is void extern "C" __xray_FunctionEntry(uint32_t FuncId) with
- @ FuncId passed in r0 register.
-__xray_FunctionEntry:
- PUSH {r1-r3,lr}
- @ Save floating-point parameters of the instrumented function
- VPUSH {d0-d7}
- MOVW r1,#:lower16:_ZN6__xray19XRayPatchedFunctionE
- MOVT r1,#:upper16:_ZN6__xray19XRayPatchedFunctionE
- LDR r2, [r1]
- @ Handler address is nullptr if handler is not set
- CMP r2, #0
- BEQ FunctionEntry_restore
- @ Function ID is already in r0 (the first parameter).
- @ r1=0 means that we are tracing an entry event
- MOV r1, #0
- @ Call the handler with 2 parameters in r0 and r1
- BLX r2
-FunctionEntry_restore:
- @ Restore floating-point parameters of the instrumented function
- VPOP {d0-d7}
- POP {r1-r3,pc}
-
- @ Word-aligned function entry point
- .p2align 2
- @ Let C/C++ see the symbol
- .global __xray_FunctionExit
- @ Assume that d1-d7 are not used for the return value.
- @ Assume that "q" part of the floating-point registers is not used for the
- @ return value in C/C++.
- .type __xray_FunctionExit, %function
- @ In C++ it is extern "C" void __xray_FunctionExit(uint32_t FuncId) with
- @ FuncId passed in r0 register.
-__xray_FunctionExit:
- PUSH {r1-r3,lr}
- @ Save the floating-point return value of the instrumented function
- VPUSH {d0}
- @ Load the handler address
- MOVW r1,#:lower16:_ZN6__xray19XRayPatchedFunctionE
- MOVT r1,#:upper16:_ZN6__xray19XRayPatchedFunctionE
- LDR r2, [r1]
- @ Handler address is nullptr if handler is not set
- CMP r2, #0
- BEQ FunctionExit_restore
- @ Function ID is already in r0 (the first parameter).
- @ 1 means that we are tracing an exit event
- MOV r1, #1
- @ Call the handler with 2 parameters in r0 and r1
- BLX r2
-FunctionExit_restore:
- @ Restore the floating-point return value of the instrumented function
- VPOP {d0}
- POP {r1-r3,pc}
diff --git a/lib/xray/xray_x86_64.cc b/lib/xray/xray_x86_64.cc
deleted file mode 100644
index 398d6fc14..000000000
--- a/lib/xray/xray_x86_64.cc
+++ /dev/null
@@ -1,116 +0,0 @@
-#include "xray_interface_internal.h"
-#include "sanitizer_common/sanitizer_common.h"
-#include <atomic>
-#include <cstdint>
-#include <limits>
-
-namespace __xray {
-
-static constexpr uint8_t CallOpCode = 0xe8;
-static constexpr uint16_t MovR10Seq = 0xba41;
-static constexpr uint16_t Jmp9Seq = 0x09eb;
-static constexpr uint8_t JmpOpCode = 0xe9;
-static constexpr uint8_t RetOpCode = 0xc3;
-
-static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()};
-static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()};
-
-bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, const XRaySledEntry& Sled)
-{
- // Here we do the dance of replacing the following sled:
- //
- // xray_sled_n:
- // jmp +9
- // <9 byte nop>
- //
- // With the following:
- //
- // mov r10d, <function id>
- // call <relative 32bit offset to entry trampoline>
- //
- // We need to do this in the following order:
- //
- // 1. Put the function id first, 2 bytes from the start of the sled (just
- // after the 2-byte jmp instruction).
- // 2. Put the call opcode 6 bytes from the start of the sled.
- // 3. Put the relative offset 7 bytes from the start of the sled.
- // 4. Do an atomic write over the jmp instruction for the "mov r10d"
- // opcode and first operand.
- //
- // Prerequisite is to compute the relative offset to the
- // __xray_FunctionEntry function's address.
- int64_t TrampolineOffset =
- reinterpret_cast<int64_t>(__xray_FunctionEntry) -
- (static_cast<int64_t>(Sled.Address) + 11);
- if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
- Report("XRay Entry trampoline (%p) too far from sled (%p); distance = "
- "%ld\n",
- __xray_FunctionEntry, reinterpret_cast<void *>(Sled.Address),
- TrampolineOffset);
- return false;
- }
- if (Enable) {
- *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
- *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode;
- *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
- std::atomic_store_explicit(
- reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
- std::memory_order_release);
- } else {
- std::atomic_store_explicit(
- reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq,
- std::memory_order_release);
- // FIXME: Write out the nops still?
- }
- return true;
-}
-
-bool patchFunctionExit(const bool Enable, const uint32_t FuncId, const XRaySledEntry& Sled)
-{
- // Here we do the dance of replacing the following sled:
- //
- // xray_sled_n:
- // ret
- // <10 byte nop>
- //
- // With the following:
- //
- // mov r10d, <function id>
- // jmp <relative 32bit offset to exit trampoline>
- //
- // 1. Put the function id first, 2 bytes from the start of the sled (just
- // after the 1-byte ret instruction).
- // 2. Put the jmp opcode 6 bytes from the start of the sled.
- // 3. Put the relative offset 7 bytes from the start of the sled.
- // 4. Do an atomic write over the jmp instruction for the "mov r10d"
- // opcode and first operand.
- //
- // Prerequisite is to compute the relative offset fo the
- // __xray_FunctionExit function's address.
- int64_t TrampolineOffset =
- reinterpret_cast<int64_t>(__xray_FunctionExit) -
- (static_cast<int64_t>(Sled.Address) + 11);
- if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
- Report("XRay Exit trampoline (%p) too far from sled (%p); distance = "
- "%ld\n",
- __xray_FunctionExit, reinterpret_cast<void *>(Sled.Address),
- TrampolineOffset);
- return false;
- }
- if (Enable) {
- *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
- *reinterpret_cast<uint8_t *>(Sled.Address + 6) = JmpOpCode;
- *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
- std::atomic_store_explicit(
- reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
- std::memory_order_release);
- } else {
- std::atomic_store_explicit(
- reinterpret_cast<std::atomic<uint8_t> *>(Sled.Address), RetOpCode,
- std::memory_order_release);
- // FIXME: Write out the nops still?
- }
- return true;
-}
-
-} // namespace __xray