summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDean Michael Berris <dberris@google.com>2017-08-23 04:42:37 +0000
committerDean Michael Berris <dberris@google.com>2017-08-23 04:42:37 +0000
commit94fc5971be53f669387763bf9599d23da08b30be (patch)
treed77ac43db135c1e5ea3b438fff09f91080e22937
parent24e609bda53ee838eb1f2f6eef82fc04deb7db13 (diff)
[XRay][compiler-rt] Support sled versioning for custom event sleds
Summary: This change introduces versions to the instrumentation map entries we emit for XRay instrumentaiton points. The status quo for the version is currently set to 0 (as emitted by the LLVM back-end), and versions will count up to 255 (unsigned char). This change is in preparation for supporting the newer version of the custom event sleds that will be emitted by the LLVM compiler. While we're here, we take the opportunity to stash more registers and align the stack properly in the __xray_CustomEvent trampoline. Reviewers: kpw, pcc, dblaikie Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D36816 git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@311524 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/xray/xray_interface_internal.h6
-rw-r--r--lib/xray/xray_trampoline_x86_64.S21
-rw-r--r--lib/xray/xray_x86_64.cc36
-rw-r--r--test/xray/TestCases/Linux/custom-event-handler-alignment.cc42
-rw-r--r--test/xray/TestCases/Linux/custom-event-logging.cc2
5 files changed, 85 insertions, 22 deletions
diff --git a/lib/xray/xray_interface_internal.h b/lib/xray/xray_interface_internal.h
index 4a2784612..5811e2b73 100644
--- a/lib/xray/xray_interface_internal.h
+++ b/lib/xray/xray_interface_internal.h
@@ -28,13 +28,15 @@ struct XRaySledEntry {
uint64_t Function;
unsigned char Kind;
unsigned char AlwaysInstrument;
- unsigned char Padding[14]; // Need 32 bytes
+ unsigned char Version;
+ unsigned char Padding[13]; // Need 32 bytes
#elif SANITIZER_WORDSIZE == 32
uint32_t Address;
uint32_t Function;
unsigned char Kind;
unsigned char AlwaysInstrument;
- unsigned char Padding[6]; // Need 16 bytes
+ unsigned char Version;
+ unsigned char Padding[5]; // Need 16 bytes
#else
#error "Unsupported word size."
#endif
diff --git a/lib/xray/xray_trampoline_x86_64.S b/lib/xray/xray_trampoline_x86_64.S
index b59eedc4b..5c38c40e2 100644
--- a/lib/xray/xray_trampoline_x86_64.S
+++ b/lib/xray/xray_trampoline_x86_64.S
@@ -202,10 +202,7 @@ __xray_ArgLoggerEntry:
.type __xray_CustomEvent,@function
__xray_CustomEvent:
.cfi_startproc
- subq $16, %rsp
- .cfi_def_cfa_offset 24
- movq %rbp, 8(%rsp)
- movq %rax, 0(%rsp)
+ SAVE_REGISTERS
// We take two arguments to this trampoline, which should be in rdi and rsi
// already. We also make sure that we stash %rax because we use that register
@@ -215,14 +212,20 @@ __xray_CustomEvent:
je .LcustomEventCleanup
// At this point we know that rcx and rdx already has the data, so we just
- // call the logging handler.
+ // call the logging handler, after aligning the stack to a 16-byte boundary.
+ // The approach we're taking here uses additional stack space to stash the
+ // stack pointer twice before aligning the pointer to 16-bytes. If the stack
+ // was 8-byte aligned, it will become 16-byte aligned -- when restoring the
+ // pointer, we can always look -8 bytes from the current position to get
+ // either of the values we've stashed in the first place.
+ pushq %rsp
+ pushq (%rsp)
+ andq $-0x10, %rsp
callq *%rax
+ movq 8(%rsp), %rsp
.LcustomEventCleanup:
- movq 0(%rsp), %rax
- movq 8(%rsp), %rbp
- addq $16, %rsp
- .cfi_def_cfa_offset 8
+ RESTORE_REGISTERS
retq
.Ltmp8:
diff --git a/lib/xray/xray_x86_64.cc b/lib/xray/xray_x86_64.cc
index e34806fa1..3c12682cc 100644
--- a/lib/xray/xray_x86_64.cc
+++ b/lib/xray/xray_x86_64.cc
@@ -76,6 +76,7 @@ static constexpr uint8_t CallOpCode = 0xe8;
static constexpr uint16_t MovR10Seq = 0xba41;
static constexpr uint16_t Jmp9Seq = 0x09eb;
static constexpr uint16_t Jmp20Seq = 0x14eb;
+static constexpr uint16_t Jmp15Seq = 0x0feb;
static constexpr uint8_t JmpOpCode = 0xe9;
static constexpr uint8_t RetOpCode = 0xc3;
static constexpr uint16_t NopwSeq = 0x9066;
@@ -207,8 +208,10 @@ bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
// Here we do the dance of replacing the following sled:
//
+ // In Version 0:
+ //
// xray_sled_n:
- // jmp +19 // 2 bytes
+ // jmp +20 // 2 bytes
// ...
//
// With the following:
@@ -216,24 +219,35 @@ bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
// nopw // 2 bytes*
// ...
//
- // We need to do this in the following order:
//
- // 1. Overwrite the 5-byte nop with the call (relative), where (relative) is
- // the relative offset to the __xray_CustomEvent trampoline.
- // 2. Do a two-byte atomic write over the 'jmp +24' to turn it into a 'nopw'.
- // This allows us to "enable" this code once the changes have committed.
+ // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
+ //
+ // ---
//
- // The "unpatch" should just turn the 'nopw' back to a 'jmp +24'.
+ // In Version 1:
+ //
+ // The jump offset is now 15 bytes (0x0f), so when restoring the nopw back
+ // to a jmp, use 15 bytes instead.
//
if (Enable) {
std::atomic_store_explicit(
reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), NopwSeq,
std::memory_order_release);
} else {
- std::atomic_store_explicit(
- reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp20Seq,
- std::memory_order_release);
- }
+ switch (Sled.Version) {
+ case 1:
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp15Seq,
+ std::memory_order_release);
+ break;
+ case 0:
+ default:
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp20Seq,
+ std::memory_order_release);
+ break;
+ }
+ }
return false;
}
diff --git a/test/xray/TestCases/Linux/custom-event-handler-alignment.cc b/test/xray/TestCases/Linux/custom-event-handler-alignment.cc
new file mode 100644
index 000000000..447f6e4f2
--- /dev/null
+++ b/test/xray/TestCases/Linux/custom-event-handler-alignment.cc
@@ -0,0 +1,42 @@
+// Make sure we're aligning the stack properly when lowering the custom event
+// calls.
+//
+// RUN: %clangxx_xray -std=c++11 %s -o %t
+// RUN: XRAY_OPTIONS="patch_premain=false verbosity=1 xray_naive_log=false" \
+// RUN: %run %t 2>&1
+// REQUIRES: x86_64-linux
+// REQUIRES: built-in-llvm-tree
+#include <xmmintrin.h>
+#include <stdio.h>
+#include "xray/xray_interface.h"
+
+[[clang::xray_never_instrument]] __attribute__((weak)) __m128 f(__m128 *i) {
+ return *i;
+}
+
+[[clang::xray_always_instrument]] void foo() {
+ __xray_customevent(0, 0);
+ __m128 v = {};
+ f(&v);
+}
+
+[[clang::xray_always_instrument]] void bar() {
+ __xray_customevent(0, 0);
+}
+
+void printer(void* ptr, size_t size) {
+ printf("handler called\n");
+ __m128 v = {};
+ f(&v);
+}
+
+int main(int argc, char* argv[]) {
+ __xray_set_customevent_handler(printer);
+ __xray_patch();
+ foo(); // CHECK: handler called
+ bar(); // CHECK: handler called
+ __xray_unpatch();
+ __xray_remove_customevent_handler();
+ foo();
+ bar();
+}
diff --git a/test/xray/TestCases/Linux/custom-event-logging.cc b/test/xray/TestCases/Linux/custom-event-logging.cc
index 9bb5d44e1..48fd62034 100644
--- a/test/xray/TestCases/Linux/custom-event-logging.cc
+++ b/test/xray/TestCases/Linux/custom-event-logging.cc
@@ -2,6 +2,8 @@
//
// RUN: %clangxx_xray -std=c++11 %s -o %t
// RUN: XRAY_OPTIONS="patch_premain=false verbosity=1 xray_naive_log=false xray_logfile_base=custom-event-logging.xray-" %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_xray -std=c++11 -fpic -fpie %s -o %t
+// RUN: XRAY_OPTIONS="patch_premain=false verbosity=1 xray_naive_log=false xray_logfile_base=custom-event-logging.xray-" %run %t 2>&1 | FileCheck %s
// FIXME: Support this in non-x86_64 as well
// REQUIRES: x86_64-linux
// REQUIRES: built-in-llvm-tree