summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDean Michael Berris <dberris@google.com>2016-11-21 03:20:43 +0000
committerDean Michael Berris <dberris@google.com>2016-11-21 03:20:43 +0000
commit2a05c834ed840634ae1247f30179ba90d6255f34 (patch)
tree863976c50fce3366589c9cdd6cc5ba4b9b5015a1
parent8134888c4d01164a92fb590c8d2e8c277a55e233 (diff)
[XRay] Support AArch64 in compiler-rt
This patch adds XRay support in compiler-rt for AArch64 targets. This patch is one of a series: LLVM: https://reviews.llvm.org/D26412 Clang: https://reviews.llvm.org/D26415 Author: rSerge Reviewers: rengolin, dberris Subscribers: aemerson, mgorny, llvm-commits, iid_iunknown Differential Revision: https://reviews.llvm.org/D26413 git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@287517 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--cmake/config-ix.cmake2
-rw-r--r--include/xray/xray_interface.h7
-rw-r--r--lib/xray/CMakeLists.txt5
-rw-r--r--lib/xray/xray_AArch64.cc105
-rw-r--r--lib/xray/xray_inmemory_log.cc6
-rw-r--r--lib/xray/xray_interface.cc2
-rw-r--r--lib/xray/xray_trampoline_AArch64.S89
7 files changed, 212 insertions, 4 deletions
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index 0795a63f0..7b18584f5 100644
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -161,7 +161,7 @@ set(ALL_SAFESTACK_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM64} ${MIPS32} ${MIPS64})
set(ALL_CFI_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64})
set(ALL_ESAN_SUPPORTED_ARCH ${X86_64} ${MIPS64})
set(ALL_SCUDO_SUPPORTED_ARCH ${X86_64})
-set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32})
+set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64})
if(APPLE)
include(CompilerRTDarwinUtils)
diff --git a/include/xray/xray_interface.h b/include/xray/xray_interface.h
index 680fcfdd5..9e712b1fa 100644
--- a/include/xray/xray_interface.h
+++ b/include/xray/xray_interface.h
@@ -32,6 +32,13 @@ enum XRayEntryType { ENTRY = 0, EXIT = 1, TAIL = 2 };
// (function entry, function exit, etc.). See the enum
// XRayEntryType for more details.
//
+// The user handler must handle correctly spurious calls after this handler is
+// removed or replaced with another handler, because it would be too costly for
+// XRay runtime to avoid spurious calls.
+// To prevent circular calling, the handler function itself and all its
+// direct&indirect callees must not be instrumented with XRay, which can be
+// achieved by marking them all with: __attribute__((xray_never_instrument))
+//
// Returns 1 on success, 0 on error.
extern int __xray_set_handler(void (*entry)(int32_t, XRayEntryType));
diff --git a/lib/xray/CMakeLists.txt b/lib/xray/CMakeLists.txt
index c9f5105eb..bab84d835 100644
--- a/lib/xray/CMakeLists.txt
+++ b/lib/xray/CMakeLists.txt
@@ -19,6 +19,11 @@ set(arm_SOURCES
set(armhf_SOURCES ${arm_SOURCES})
+set(aarch64_SOURCES
+ xray_AArch64.cc
+ xray_trampoline_AArch64.S
+ ${XRAY_SOURCES})
+
include_directories(..)
include_directories(../../include)
diff --git a/lib/xray/xray_AArch64.cc b/lib/xray/xray_AArch64.cc
new file mode 100644
index 000000000..c2d33a200
--- /dev/null
+++ b/lib/xray/xray_AArch64.cc
@@ -0,0 +1,105 @@
+//===-- xray_AArch64.cc -----------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Implementation of AArch64-specific routines (64-bit).
+//
+//===----------------------------------------------------------------------===//
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray_defs.h"
+#include "xray_interface_internal.h"
+#include <atomic>
+#include <cassert>
+
+namespace __xray {
+
+// The machine codes for some instructions used in runtime patching.
+enum class PatchOpcodes : uint32_t {
+ PO_StpX0X30SP_m16e = 0xA9BF7BE0, // STP X0, X30, [SP, #-16]!
+ PO_LdrW0_12 = 0x18000060, // LDR W0, #12
+ PO_LdrX16_12 = 0x58000070, // LDR X16, #12
+ PO_BlrX16 = 0xD63F0200, // BLR X16
+ PO_LdpX0X30SP_16 = 0xA8C17BE0, // LDP X0, X30, [SP], #16
+ PO_B32 = 0x14000008 // B #32
+};
+
+inline static bool patchSled(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled,
+ void (*TracingHook)()) XRAY_NEVER_INSTRUMENT {
+ // When |Enable| == true,
+ // We replace the following compile-time stub (sled):
+ //
+ // xray_sled_n:
+ // B #32
+ // 7 NOPs (24 bytes)
+ //
+ // With the following runtime patch:
+ //
+ // xray_sled_n:
+ // STP X0, X30, [SP, #-16]! ; PUSH {r0, lr}
+ // LDR W0, #12 ; W0 := function ID
+ // LDR X16,#12 ; X16 := address of the trampoline
+ // BLR X16
+ // ;DATA: 32 bits of function ID
+ // ;DATA: lower 32 bits of the address of the trampoline
+ // ;DATA: higher 32 bits of the address of the trampoline
+ // LDP X0, X30, [SP], #16 ; POP {r0, lr}
+ //
+ // Replacement of the first 4-byte instruction should be the last and atomic
+ // operation, so that the user code which reaches the sled concurrently
+ // either jumps over the whole sled, or executes the whole sled when the
+ // latter is ready.
+ //
+ // When |Enable|==false, we set back the first instruction in the sled to be
+ // B #32
+
+ uint32_t *FirstAddress = reinterpret_cast<uint32_t *>(Sled.Address);
+ if (Enable) {
+ uint32_t *CurAddress = FirstAddress + 1;
+ *CurAddress = uint32_t(PatchOpcodes::PO_LdrW0_12);
+ CurAddress++;
+ *CurAddress = uint32_t(PatchOpcodes::PO_LdrX16_12);
+ CurAddress++;
+ *CurAddress = uint32_t(PatchOpcodes::PO_BlrX16);
+ CurAddress++;
+ *CurAddress = FuncId;
+ CurAddress++;
+ *reinterpret_cast<void (**)()>(CurAddress) = TracingHook;
+ CurAddress += 2;
+ *CurAddress = uint32_t(PatchOpcodes::PO_LdpX0X30SP_16);
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
+ uint32_t(PatchOpcodes::PO_StpX0X30SP_m16e), std::memory_order_release);
+ } else {
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
+ uint32_t(PatchOpcodes::PO_B32), std::memory_order_release);
+ }
+ return true;
+}
+
+bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ return patchSled(Enable, FuncId, Sled, __xray_FunctionEntry);
+}
+
+bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+}
+
+bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // FIXME: In the future we'd need to distinguish between non-tail exits and
+ // tail exits for better information preservation.
+ return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+}
+
+} // namespace __xray
diff --git a/lib/xray/xray_inmemory_log.cc b/lib/xray/xray_inmemory_log.cc
index 17275ccf9..d00e39316 100644
--- a/lib/xray/xray_inmemory_log.cc
+++ b/lib/xray/xray_inmemory_log.cc
@@ -27,7 +27,7 @@
#if defined(__x86_64__)
#include <x86intrin.h>
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
static const int64_t NanosecondsPerSecond = 1000LL * 1000 * 1000;
#else
#error "Unsupported CPU Architecture"
@@ -195,7 +195,7 @@ void __xray_InMemoryRawLog(int32_t FuncId,
} else {
Report("Unable to determine CPU frequency for TSC accounting.");
}
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
// There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does
// not have a constant frequency like TSC on x86(_64), it may go faster
// or slower depending on CPU turbo or power saving mode. Furthermore,
@@ -243,7 +243,7 @@ void __xray_InMemoryRawLog(int32_t FuncId,
R.TSC = __rdtscp(&CPU);
R.CPU = CPU;
}
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
{
timespec TS;
int result = clock_gettime(CLOCK_REALTIME, &TS);
diff --git a/lib/xray/xray_interface.cc b/lib/xray/xray_interface.cc
index bfee1b8bc..60a5c77fe 100644
--- a/lib/xray/xray_interface.cc
+++ b/lib/xray/xray_interface.cc
@@ -33,6 +33,8 @@ namespace __xray {
static const int16_t cSledLength = 12;
#elif defined(__arm__)
static const int16_t cSledLength = 28;
+#elif defined(__aarch64__)
+static const int16_t cSledLength = 32;
#else
#error "Unsupported CPU Architecture"
#endif /* CPU architecture */
diff --git a/lib/xray/xray_trampoline_AArch64.S b/lib/xray/xray_trampoline_AArch64.S
new file mode 100644
index 000000000..f1a471c04
--- /dev/null
+++ b/lib/xray/xray_trampoline_AArch64.S
@@ -0,0 +1,89 @@
+ .text
+ /* The variable containing the handler function pointer */
+ .global _ZN6__xray19XRayPatchedFunctionE
+ /* Word-aligned function entry point */
+ .p2align 2
+ /* Let C/C++ see the symbol */
+ .global __xray_FunctionEntry
+ .type __xray_FunctionEntry, %function
+ /* In C++ it is void extern "C" __xray_FunctionEntry(uint32_t FuncId) with
+ FuncId passed in W0 register. */
+__xray_FunctionEntry:
+ /* Move the return address beyond the end of sled data. The 12 bytes of
+ data are inserted in the code of the runtime patch, between the call
+ instruction and the instruction returned into. The data contains 32
+ bits of instrumented function ID and 64 bits of the address of
+ the current trampoline. */
+ ADD X30, X30, #12
+ /* Push the registers which may be modified by the handler function */
+ STP X1, X2, [SP, #-16]!
+ STP X3, X4, [SP, #-16]!
+ STP X5, X6, [SP, #-16]!
+ STP X7, X30, [SP, #-16]!
+ STP Q0, Q1, [SP, #-32]!
+ STP Q2, Q3, [SP, #-32]!
+ STP Q4, Q5, [SP, #-32]!
+ STP Q6, Q7, [SP, #-32]!
+ /* Load the address of _ZN6__xray19XRayPatchedFunctionE into X1 */
+ LDR X1, =_ZN6__xray19XRayPatchedFunctionE
+ /* Load the handler function pointer into X2 */
+ LDR X2, [X1]
+ /* Handler address is nullptr if handler is not set */
+ CMP X2, #0
+ BEQ FunctionEntry_restore
+ /* Function ID is already in W0 (the first parameter).
+ X1=0 means that we are tracing an entry event */
+ MOV X1, #0
+ /* Call the handler with 2 parameters in W0 and X1 */
+ BLR X2
+FunctionEntry_restore:
+ /* Pop the saved registers */
+ LDP Q6, Q7, [SP], #32
+ LDP Q4, Q5, [SP], #32
+ LDP Q2, Q3, [SP], #32
+ LDP Q0, Q1, [SP], #32
+ LDP X7, X30, [SP], #16
+ LDP X5, X6, [SP], #16
+ LDP X3, X4, [SP], #16
+ LDP X1, X2, [SP], #16
+ RET
+
+ /* Word-aligned function entry point */
+ .p2align 2
+ /* Let C/C++ see the symbol */
+ .global __xray_FunctionExit
+ .type __xray_FunctionExit, %function
+ /* In C++ it is void extern "C" __xray_FunctionExit(uint32_t FuncId) with
+ FuncId passed in W0 register. */
+__xray_FunctionExit:
+ /* Move the return address beyond the end of sled data. The 12 bytes of
+ data are inserted in the code of the runtime patch, between the call
+ instruction and the instruction returned into. The data contains 32
+ bits of instrumented function ID and 64 bits of the address of
+ the current trampoline. */
+ ADD X30, X30, #12
+ /* Push the registers which may be modified by the handler function */
+ STP X1, X2, [SP, #-16]!
+ STP X3, X4, [SP, #-16]!
+ STP X5, X6, [SP, #-16]!
+ STP X7, X30, [SP, #-16]!
+ STR Q0, [SP, #-16]!
+ /* Load the address of _ZN6__xray19XRayPatchedFunctionE into X1 */
+ LDR X1, =_ZN6__xray19XRayPatchedFunctionE
+ /* Load the handler function pointer into X2 */
+ LDR X2, [X1]
+ /* Handler address is nullptr if handler is not set */
+ CMP X2, #0
+ BEQ FunctionExit_restore
+ /* Function ID is already in W0 (the first parameter).
+ X1=1 means that we are tracing an exit event */
+ MOV X1, #1
+ /* Call the handler with 2 parameters in W0 and X1 */
+ BLR X2
+FunctionExit_restore:
+ LDR Q0, [SP], #16
+ LDP X7, X30, [SP], #16
+ LDP X5, X6, [SP], #16
+ LDP X3, X4, [SP], #16
+ LDP X1, X2, [SP], #16
+ RET