[compiler-rt] r289789 - [XRay][compiler-rt][NFC] Deduplicate code in x86-64 trampolines.
Dean Michael Berris via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 15 01:04:06 PST 2016
Author: dberris
Date: Thu Dec 15 03:04:05 2016
New Revision: 289789
URL: http://llvm.org/viewvc/llvm-project?rev=289789&view=rev
Log:
[XRay][compiler-rt][NFC] Deduplicate code in x86-64 trampolines.
Summary:
The layout of all registers saved on stack shouldn't deviate and will be reused in future trampolines as well.
While there, fix whitespace and clarify comments.
Author: mpel
Reviewers: dberris
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D27799
Modified:
compiler-rt/trunk/lib/xray/xray_trampoline_x86_64.S
Modified: compiler-rt/trunk/lib/xray/xray_trampoline_x86_64.S
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/xray/xray_trampoline_x86_64.S?rev=289789&r1=289788&r2=289789&view=diff
==============================================================================
--- compiler-rt/trunk/lib/xray/xray_trampoline_x86_64.S (original)
+++ compiler-rt/trunk/lib/xray/xray_trampoline_x86_64.S Thu Dec 15 03:04:05 2016
@@ -13,17 +13,7 @@
//
//===----------------------------------------------------------------------===//
- .text
- .file "xray_trampoline_x86.S"
- .globl __xray_FunctionEntry
- .align 16, 0x90
- .type __xray_FunctionEntry, at function
-
-__xray_FunctionEntry:
- .cfi_startproc
- // Save caller provided registers before doing any actual work.
- pushq %rbp
- .cfi_def_cfa_offset 16
+.macro SAVE_REGISTERS
subq $200, %rsp
movupd %xmm0, 184(%rsp)
movupd %xmm1, 168(%rsp)
@@ -34,25 +24,15 @@ __xray_FunctionEntry:
movupd %xmm6, 88(%rsp)
movupd %xmm7, 72(%rsp)
movq %rdi, 64(%rsp)
- movq %rax, 56(%rsp)
- movq %rdx, 48(%rsp)
+ movq %rax, 56(%rsp)
+ movq %rdx, 48(%rsp)
movq %rsi, 40(%rsp)
movq %rcx, 32(%rsp)
movq %r8, 24(%rsp)
movq %r9, 16(%rsp)
+.endm SAVE_REGISTERS
- // de-mangled, that's __xray::XRayPatchedFunction, and we're doing an acquire
- // load (on x86 is a normal mov instruction).
- movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax
- testq %rax, %rax
- je .Ltmp0
-
- // assume that %r10d has the function id.
- movl %r10d, %edi
- xor %esi,%esi
- callq *%rax
-.Ltmp0:
- // restore the registers
+.macro RESTORE_REGISTERS
movupd 184(%rsp), %xmm0
movupd 168(%rsp), %xmm1
movupd 152(%rsp), %xmm2
@@ -62,13 +42,39 @@ __xray_FunctionEntry:
movupd 88(%rsp) , %xmm6
movupd 72(%rsp) , %xmm7
movq 64(%rsp), %rdi
- movq 56(%rsp), %rax
- movq 48(%rsp), %rdx
+ movq 56(%rsp), %rax
+ movq 48(%rsp), %rdx
movq 40(%rsp), %rsi
movq 32(%rsp), %rcx
movq 24(%rsp), %r8
movq 16(%rsp), %r9
addq $200, %rsp
+.endm RESTORE_REGISTERS
+
+ .text
+ .file "xray_trampoline_x86.S"
+ .globl __xray_FunctionEntry
+ .align 16, 0x90
+ .type __xray_FunctionEntry, at function
+
+__xray_FunctionEntry:
+ .cfi_startproc
+ pushq %rbp
+ .cfi_def_cfa_offset 16
+ SAVE_REGISTERS
+
+ // This load has to be atomic, it's concurrent with __xray_patch().
+ // On x86/amd64, a simple (type-aligned) MOV instruction is enough.
+ movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax
+ testq %rax, %rax
+ je .Ltmp0
+
+ // The patched function prolog puts its xray_instr_map index into %r10d.
+ movl %r10d, %edi
+ xor %esi,%esi
+ callq *%rax
+.Ltmp0:
+ RESTORE_REGISTERS
popq %rbp
retq
.Ltmp1:
@@ -99,7 +105,7 @@ __xray_FunctionExit:
movl $1, %esi
callq *%rax
.Ltmp2:
- // Restore the important registers.
+ // Restore the important registers.
movupd 40(%rsp), %xmm0
movupd 24(%rsp), %xmm1
movq 16(%rsp), %rax
@@ -122,22 +128,7 @@ __xray_FunctionTailExit:
// this and increment the version number for the header.
pushq %rbp
.cfi_def_cfa_offset 16
- subq $200, %rsp
- movupd %xmm0, 184(%rsp)
- movupd %xmm1, 168(%rsp)
- movupd %xmm2, 152(%rsp)
- movupd %xmm3, 136(%rsp)
- movupd %xmm4, 120(%rsp)
- movupd %xmm5, 104(%rsp)
- movupd %xmm6, 88(%rsp)
- movupd %xmm7, 72(%rsp)
- movq %rdi, 64(%rsp)
- movq %rax, 56(%rsp)
- movq %rdx, 48(%rsp)
- movq %rsi, 40(%rsp)
- movq %rcx, 32(%rsp)
- movq %r8, 24(%rsp)
- movq %r9, 16(%rsp)
+ SAVE_REGISTERS
movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax
testq %rax,%rax
@@ -148,25 +139,9 @@ __xray_FunctionTailExit:
callq *%rax
.Ltmp4:
- // Restore the registers.
- movupd 184(%rsp), %xmm0
- movupd 168(%rsp), %xmm1
- movupd 152(%rsp), %xmm2
- movupd 136(%rsp), %xmm3
- movupd 120(%rsp), %xmm4
- movupd 104(%rsp), %xmm5
- movupd 88(%rsp) , %xmm6
- movupd 72(%rsp) , %xmm7
- movq 64(%rsp), %rdi
- movq 56(%rsp), %rax
- movq 48(%rsp), %rdx
- movq 40(%rsp), %rsi
- movq 32(%rsp), %rcx
- movq 24(%rsp), %r8
- movq 16(%rsp), %r9
- addq $200, %rsp
+ RESTORE_REGISTERS
popq %rbp
retq
.Ltmp5:
- .size __xray_FunctionTailExit, .Ltmp5-__xray_FunctionTailExit
+ .size __xray_FunctionTailExit, .Ltmp5-__xray_FunctionTailExit
.cfi_endproc
More information about the llvm-commits
mailing list