//===-- xray_trampoline_x86.s -----------------------------------*- ASM -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file is a part of XRay, a dynamic runtime instrumentation system. // // This implements the X86-specific assembler for the trampolines. // //===----------------------------------------------------------------------===// .macro SAVE_REGISTERS subq $200, %rsp movupd %xmm0, 184(%rsp) movupd %xmm1, 168(%rsp) movupd %xmm2, 152(%rsp) movupd %xmm3, 136(%rsp) movupd %xmm4, 120(%rsp) movupd %xmm5, 104(%rsp) movupd %xmm6, 88(%rsp) movupd %xmm7, 72(%rsp) movq %rdi, 64(%rsp) movq %rax, 56(%rsp) movq %rdx, 48(%rsp) movq %rsi, 40(%rsp) movq %rcx, 32(%rsp) movq %r8, 24(%rsp) movq %r9, 16(%rsp) .endm .macro RESTORE_REGISTERS movupd 184(%rsp), %xmm0 movupd 168(%rsp), %xmm1 movupd 152(%rsp), %xmm2 movupd 136(%rsp), %xmm3 movupd 120(%rsp), %xmm4 movupd 104(%rsp), %xmm5 movupd 88(%rsp) , %xmm6 movupd 72(%rsp) , %xmm7 movq 64(%rsp), %rdi movq 56(%rsp), %rax movq 48(%rsp), %rdx movq 40(%rsp), %rsi movq 32(%rsp), %rcx movq 24(%rsp), %r8 movq 16(%rsp), %r9 addq $200, %rsp .endm .text .file "xray_trampoline_x86.S" .globl __xray_FunctionEntry .align 16, 0x90 .type __xray_FunctionEntry,@function __xray_FunctionEntry: .cfi_startproc pushq %rbp .cfi_def_cfa_offset 16 SAVE_REGISTERS // This load has to be atomic, it's concurrent with __xray_patch(). // On x86/amd64, a simple (type-aligned) MOV instruction is enough. movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax testq %rax, %rax je .Ltmp0 // The patched function prolog puts its xray_instr_map index into %r10d. movl %r10d, %edi xor %esi,%esi callq *%rax .Ltmp0: RESTORE_REGISTERS popq %rbp retq .Ltmp1: .size __xray_FunctionEntry, .Ltmp1-__xray_FunctionEntry .cfi_endproc .globl __xray_FunctionExit .align 16, 0x90 .type __xray_FunctionExit,@function __xray_FunctionExit: .cfi_startproc // Save the important registers first. Since we're assuming that this // function is only jumped into, we only preserve the registers for // returning. pushq %rbp .cfi_def_cfa_offset 16 subq $56, %rsp .cfi_def_cfa_offset 32 movupd %xmm0, 40(%rsp) movupd %xmm1, 24(%rsp) movq %rax, 16(%rsp) movq %rdx, 8(%rsp) movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax testq %rax,%rax je .Ltmp2 movl %r10d, %edi movl $1, %esi callq *%rax .Ltmp2: // Restore the important registers. movupd 40(%rsp), %xmm0 movupd 24(%rsp), %xmm1 movq 16(%rsp), %rax movq 8(%rsp), %rdx addq $56, %rsp popq %rbp retq .Ltmp3: .size __xray_FunctionExit, .Ltmp3-__xray_FunctionExit .cfi_endproc .global __xray_FunctionTailExit .align 16, 0x90 .type __xray_FunctionTailExit,@function __xray_FunctionTailExit: .cfi_startproc // Save the important registers as in the entry trampoline, but indicate that // this is an exit. In the future, we will introduce a new entry type that // differentiates between a normal exit and a tail exit, but we'd have to do // this and increment the version number for the header. pushq %rbp .cfi_def_cfa_offset 16 SAVE_REGISTERS movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax testq %rax,%rax je .Ltmp4 movl %r10d, %edi movl $1, %esi callq *%rax .Ltmp4: RESTORE_REGISTERS popq %rbp retq .Ltmp5: .size __xray_FunctionTailExit, .Ltmp5-__xray_FunctionTailExit .cfi_endproc