summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/xray/xray_interface_internal.h6
-rw-r--r--lib/xray/xray_trampoline_x86_64.S21
-rw-r--r--lib/xray/xray_x86_64.cc36
-rw-r--r--test/xray/TestCases/Linux/custom-event-handler-alignment.cc42
-rw-r--r--test/xray/TestCases/Linux/custom-event-logging.cc2
5 files changed, 85 insertions, 22 deletions
diff --git a/lib/xray/xray_interface_internal.h b/lib/xray/xray_interface_internal.h
index 4a2784612..5811e2b73 100644
--- a/lib/xray/xray_interface_internal.h
+++ b/lib/xray/xray_interface_internal.h
@@ -28,13 +28,15 @@ struct XRaySledEntry {
uint64_t Function;
unsigned char Kind;
unsigned char AlwaysInstrument;
- unsigned char Padding[14]; // Need 32 bytes
+ unsigned char Version;
+ unsigned char Padding[13]; // Need 32 bytes
#elif SANITIZER_WORDSIZE == 32
uint32_t Address;
uint32_t Function;
unsigned char Kind;
unsigned char AlwaysInstrument;
- unsigned char Padding[6]; // Need 16 bytes
+ unsigned char Version;
+ unsigned char Padding[5]; // Need 16 bytes
#else
#error "Unsupported word size."
#endif
diff --git a/lib/xray/xray_trampoline_x86_64.S b/lib/xray/xray_trampoline_x86_64.S
index b59eedc4b..5c38c40e2 100644
--- a/lib/xray/xray_trampoline_x86_64.S
+++ b/lib/xray/xray_trampoline_x86_64.S
@@ -202,10 +202,7 @@ __xray_ArgLoggerEntry:
.type __xray_CustomEvent,@function
__xray_CustomEvent:
.cfi_startproc
- subq $16, %rsp
- .cfi_def_cfa_offset 24
- movq %rbp, 8(%rsp)
- movq %rax, 0(%rsp)
+ SAVE_REGISTERS
// We take two arguments to this trampoline, which should be in rdi and rsi
// already. We also make sure that we stash %rax because we use that register
@@ -215,14 +212,20 @@ __xray_CustomEvent:
je .LcustomEventCleanup
// At this point we know that rcx and rdx already has the data, so we just
- // call the logging handler.
+ // call the logging handler, after aligning the stack to a 16-byte boundary.
+ // The approach we're taking here uses additional stack space to stash the
+ // stack pointer twice before aligning the pointer to 16-bytes. If the stack
+ // was 8-byte aligned, it will become 16-byte aligned -- when restoring the
+ // pointer, we can always look -8 bytes from the current position to get
+ // either of the values we've stashed in the first place.
+ pushq %rsp
+ pushq (%rsp)
+ andq $-0x10, %rsp
callq *%rax
+ movq 8(%rsp), %rsp
.LcustomEventCleanup:
- movq 0(%rsp), %rax
- movq 8(%rsp), %rbp
- addq $16, %rsp
- .cfi_def_cfa_offset 8
+ RESTORE_REGISTERS
retq
.Ltmp8:
diff --git a/lib/xray/xray_x86_64.cc b/lib/xray/xray_x86_64.cc
index e34806fa1..3c12682cc 100644
--- a/lib/xray/xray_x86_64.cc
+++ b/lib/xray/xray_x86_64.cc
@@ -76,6 +76,7 @@ static constexpr uint8_t CallOpCode = 0xe8;
static constexpr uint16_t MovR10Seq = 0xba41;
static constexpr uint16_t Jmp9Seq = 0x09eb;
static constexpr uint16_t Jmp20Seq = 0x14eb;
+static constexpr uint16_t Jmp15Seq = 0x0feb;
static constexpr uint8_t JmpOpCode = 0xe9;
static constexpr uint8_t RetOpCode = 0xc3;
static constexpr uint16_t NopwSeq = 0x9066;
@@ -207,8 +208,10 @@ bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
// Here we do the dance of replacing the following sled:
//
+ // In Version 0:
+ //
// xray_sled_n:
- // jmp +19 // 2 bytes
+ // jmp +20 // 2 bytes
// ...
//
// With the following:
@@ -216,24 +219,35 @@ bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
// nopw // 2 bytes*
// ...
//
- // We need to do this in the following order:
//
- // 1. Overwrite the 5-byte nop with the call (relative), where (relative) is
- // the relative offset to the __xray_CustomEvent trampoline.
- // 2. Do a two-byte atomic write over the 'jmp +24' to turn it into a 'nopw'.
- // This allows us to "enable" this code once the changes have committed.
+ // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
+ //
+ // ---
//
- // The "unpatch" should just turn the 'nopw' back to a 'jmp +24'.
+ // In Version 1:
+ //
+ // The jump offset is now 15 bytes (0x0f), so when restoring the nopw back
+ // to a jmp, use 15 bytes instead.
//
if (Enable) {
std::atomic_store_explicit(
reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), NopwSeq,
std::memory_order_release);
} else {
- std::atomic_store_explicit(
- reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp20Seq,
- std::memory_order_release);
- }
+ switch (Sled.Version) {
+ case 1:
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp15Seq,
+ std::memory_order_release);
+ break;
+ case 0:
+ default:
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp20Seq,
+ std::memory_order_release);
+ break;
+ }
+ }
return false;
}
diff --git a/test/xray/TestCases/Linux/custom-event-handler-alignment.cc b/test/xray/TestCases/Linux/custom-event-handler-alignment.cc
new file mode 100644
index 000000000..447f6e4f2
--- /dev/null
+++ b/test/xray/TestCases/Linux/custom-event-handler-alignment.cc
@@ -0,0 +1,42 @@
+// Make sure we're aligning the stack properly when lowering the custom event
+// calls.
+//
+// RUN: %clangxx_xray -std=c++11 %s -o %t
+// RUN: XRAY_OPTIONS="patch_premain=false verbosity=1 xray_naive_log=false" \
+// RUN: %run %t 2>&1
+// REQUIRES: x86_64-linux
+// REQUIRES: built-in-llvm-tree
+#include <xmmintrin.h>
+#include <stdio.h>
+#include "xray/xray_interface.h"
+
+[[clang::xray_never_instrument]] __attribute__((weak)) __m128 f(__m128 *i) {
+ return *i;
+}
+
+[[clang::xray_always_instrument]] void foo() {
+ __xray_customevent(0, 0);
+ __m128 v = {};
+ f(&v);
+}
+
+[[clang::xray_always_instrument]] void bar() {
+ __xray_customevent(0, 0);
+}
+
+void printer(void* ptr, size_t size) {
+ printf("handler called\n");
+ __m128 v = {};
+ f(&v);
+}
+
+int main(int argc, char* argv[]) {
+ __xray_set_customevent_handler(printer);
+ __xray_patch();
+ foo(); // CHECK: handler called
+ bar(); // CHECK: handler called
+ __xray_unpatch();
+ __xray_remove_customevent_handler();
+ foo();
+ bar();
+}
diff --git a/test/xray/TestCases/Linux/custom-event-logging.cc b/test/xray/TestCases/Linux/custom-event-logging.cc
index 9bb5d44e1..48fd62034 100644
--- a/test/xray/TestCases/Linux/custom-event-logging.cc
+++ b/test/xray/TestCases/Linux/custom-event-logging.cc
@@ -2,6 +2,8 @@
//
// RUN: %clangxx_xray -std=c++11 %s -o %t
// RUN: XRAY_OPTIONS="patch_premain=false verbosity=1 xray_naive_log=false xray_logfile_base=custom-event-logging.xray-" %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_xray -std=c++11 -fpic -fpie %s -o %t
+// RUN: XRAY_OPTIONS="patch_premain=false verbosity=1 xray_naive_log=false xray_logfile_base=custom-event-logging.xray-" %run %t 2>&1 | FileCheck %s
// FIXME: Support this in non-x86_64 as well
// REQUIRES: x86_64-linux
// REQUIRES: built-in-llvm-tree