//===-- xray_trampoline_x86.s -----------------------------------*- ASM -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file is a part of XRay, a dynamic runtime instrumentation system. // // This implements the X86-specific assembler for the trampolines. // //===----------------------------------------------------------------------===// #include "../builtins/assembly.h" .macro SAVE_REGISTERS subq $192, %rsp .cfi_def_cfa_offset 200 // At this point, the stack pointer should be aligned to an 8-byte boundary, // because any call instructions that come after this will add another 8 // bytes and therefore align it to 16-bytes. movq %rbp, 184(%rsp) movupd %xmm0, 168(%rsp) movupd %xmm1, 152(%rsp) movupd %xmm2, 136(%rsp) movupd %xmm3, 120(%rsp) movupd %xmm4, 104(%rsp) movupd %xmm5, 88(%rsp) movupd %xmm6, 72(%rsp) movupd %xmm7, 56(%rsp) movq %rdi, 48(%rsp) movq %rax, 40(%rsp) movq %rdx, 32(%rsp) movq %rsi, 24(%rsp) movq %rcx, 16(%rsp) movq %r8, 8(%rsp) movq %r9, 0(%rsp) .endm .macro RESTORE_REGISTERS movq 184(%rsp), %rbp movupd 168(%rsp), %xmm0 movupd 152(%rsp), %xmm1 movupd 136(%rsp), %xmm2 movupd 120(%rsp), %xmm3 movupd 104(%rsp), %xmm4 movupd 88(%rsp), %xmm5 movupd 72(%rsp) , %xmm6 movupd 56(%rsp) , %xmm7 movq 48(%rsp), %rdi movq 40(%rsp), %rax movq 32(%rsp), %rdx movq 24(%rsp), %rsi movq 16(%rsp), %rcx movq 8(%rsp), %r8 movq 0(%rsp), %r9 addq $192, %rsp .cfi_def_cfa_offset 8 .endm .macro ALIGNED_CALL_RAX // Call the logging handler, after aligning the stack to a 16-byte boundary. // The approach we're taking here uses additional stack space to stash the // stack pointer twice before aligning the pointer to 16-bytes. If the stack // was 8-byte aligned, it will become 16-byte aligned -- when restoring the // pointer, we can always look -8 bytes from the current position to get // either of the values we've stashed in the first place. pushq %rsp pushq (%rsp) andq $-0x10, %rsp callq *%rax movq 8(%rsp), %rsp .endm .text .file "xray_trampoline_x86.S" //===----------------------------------------------------------------------===// .globl __xray_FunctionEntry .align 16, 0x90 .type __xray_FunctionEntry,@function __xray_FunctionEntry: .cfi_startproc SAVE_REGISTERS // This load has to be atomic, it's concurrent with __xray_patch(). // On x86/amd64, a simple (type-aligned) MOV instruction is enough. movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax testq %rax, %rax je .Ltmp0 // The patched function prolog puts its xray_instr_map index into %r10d. movl %r10d, %edi xor %esi,%esi ALIGNED_CALL_RAX .Ltmp0: RESTORE_REGISTERS retq .Ltmp1: .size __xray_FunctionEntry, .Ltmp1-__xray_FunctionEntry .cfi_endproc //===----------------------------------------------------------------------===// .globl __xray_FunctionExit .align 16, 0x90 .type __xray_FunctionExit,@function __xray_FunctionExit: .cfi_startproc // Save the important registers first. Since we're assuming that this // function is only jumped into, we only preserve the registers for // returning. subq $56, %rsp .cfi_def_cfa_offset 64 movq %rbp, 48(%rsp) movupd %xmm0, 32(%rsp) movupd %xmm1, 16(%rsp) movq %rax, 8(%rsp) movq %rdx, 0(%rsp) movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax testq %rax,%rax je .Ltmp2 movl %r10d, %edi movl $1, %esi ALIGNED_CALL_RAX .Ltmp2: // Restore the important registers. movq 48(%rsp), %rbp movupd 32(%rsp), %xmm0 movupd 16(%rsp), %xmm1 movq 8(%rsp), %rax movq 0(%rsp), %rdx addq $56, %rsp .cfi_def_cfa_offset 8 retq .Ltmp3: .size __xray_FunctionExit, .Ltmp3-__xray_FunctionExit .cfi_endproc //===----------------------------------------------------------------------===// .global __xray_FunctionTailExit .align 16, 0x90 .type __xray_FunctionTailExit,@function __xray_FunctionTailExit: .cfi_startproc SAVE_REGISTERS movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax testq %rax,%rax je .Ltmp4 movl %r10d, %edi movl $2, %esi ALIGNED_CALL_RAX .Ltmp4: RESTORE_REGISTERS retq .Ltmp5: .size __xray_FunctionTailExit, .Ltmp5-__xray_FunctionTailExit .cfi_endproc //===----------------------------------------------------------------------===// .globl __xray_ArgLoggerEntry .align 16, 0x90 .type __xray_ArgLoggerEntry,@function __xray_ArgLoggerEntry: .cfi_startproc SAVE_REGISTERS // Again, these function pointer loads must be atomic; MOV is fine. movq _ZN6__xray13XRayArgLoggerE(%rip), %rax testq %rax, %rax jne .Larg1entryLog // If [arg1 logging handler] not set, defer to no-arg logging. movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax testq %rax, %rax je .Larg1entryFail .Larg1entryLog: // First argument will become the third movq %rdi, %rdx // XRayEntryType::LOG_ARGS_ENTRY into the second mov $0x3, %esi // 32-bit function ID becomes the first movl %r10d, %edi ALIGNED_CALL_RAX .Larg1entryFail: RESTORE_REGISTERS retq .Larg1entryEnd: .size __xray_ArgLoggerEntry, .Larg1entryEnd-__xray_ArgLoggerEntry .cfi_endproc //===----------------------------------------------------------------------===// .global __xray_CustomEvent .align 16, 0x90 .type __xray_CustomEvent,@function __xray_CustomEvent: .cfi_startproc SAVE_REGISTERS // We take two arguments to this trampoline, which should be in rdi and rsi // already. We also make sure that we stash %rax because we use that register // to call the logging handler. movq _ZN6__xray22XRayPatchedCustomEventE(%rip), %rax testq %rax,%rax je .LcustomEventCleanup ALIGNED_CALL_RAX .LcustomEventCleanup: RESTORE_REGISTERS retq .Ltmp8: .size __xray_CustomEvent, .Ltmp8-__xray_CustomEvent .cfi_endproc NO_EXEC_STACK_DIRECTIVE