[compiler-rt] r289789 - [XRay][compiler-rt][NFC] Deduplicate code in x86-64 trampolines.

Dean Michael Berris via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 15 01:04:06 PST 2016


Author: dberris
Date: Thu Dec 15 03:04:05 2016
New Revision: 289789

URL: http://llvm.org/viewvc/llvm-project?rev=289789&view=rev
Log:
[XRay][compiler-rt][NFC] Deduplicate code in x86-64 trampolines.

Summary:
The layout of all registers saved on stack shouldn't deviate and will be reused in future trampolines as well.

While there, fix whitespace and clarify comments.

Author: mpel

Reviewers: dberris

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D27799

Modified:
    compiler-rt/trunk/lib/xray/xray_trampoline_x86_64.S

Modified: compiler-rt/trunk/lib/xray/xray_trampoline_x86_64.S
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/xray/xray_trampoline_x86_64.S?rev=289789&r1=289788&r2=289789&view=diff
==============================================================================
--- compiler-rt/trunk/lib/xray/xray_trampoline_x86_64.S (original)
+++ compiler-rt/trunk/lib/xray/xray_trampoline_x86_64.S Thu Dec 15 03:04:05 2016
@@ -13,17 +13,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-	.text
-	.file "xray_trampoline_x86.S"
-	.globl __xray_FunctionEntry
-	.align 16, 0x90
-	.type __xray_FunctionEntry, at function
-
-__xray_FunctionEntry:
-  .cfi_startproc
-  // Save caller provided registers before doing any actual work.
-	pushq %rbp
-	.cfi_def_cfa_offset 16
+.macro SAVE_REGISTERS
 	subq $200, %rsp
 	movupd	%xmm0, 184(%rsp)
 	movupd	%xmm1, 168(%rsp)
@@ -34,25 +24,15 @@ __xray_FunctionEntry:
 	movupd	%xmm6, 88(%rsp)
 	movupd	%xmm7, 72(%rsp)
 	movq	%rdi, 64(%rsp)
-	movq  %rax, 56(%rsp)
-	movq  %rdx, 48(%rsp)
+	movq	%rax, 56(%rsp)
+	movq	%rdx, 48(%rsp)
 	movq	%rsi, 40(%rsp)
 	movq	%rcx, 32(%rsp)
 	movq	%r8, 24(%rsp)
 	movq	%r9, 16(%rsp)
+.endm SAVE_REGISTERS
 
-	// de-mangled, that's __xray::XRayPatchedFunction, and we're doing an acquire
-	// load (on x86 is a normal mov instruction).
-	movq	_ZN6__xray19XRayPatchedFunctionE(%rip), %rax
-	testq	%rax, %rax
-	je	.Ltmp0
-
-	// assume that %r10d has the function id.
-	movl	%r10d, %edi
-	xor	%esi,%esi
-	callq	*%rax
-.Ltmp0:
-  // restore the registers
+.macro RESTORE_REGISTERS
 	movupd	184(%rsp), %xmm0
 	movupd	168(%rsp), %xmm1
 	movupd	152(%rsp), %xmm2
@@ -62,13 +42,39 @@ __xray_FunctionEntry:
 	movupd	88(%rsp) , %xmm6
 	movupd	72(%rsp) , %xmm7
 	movq	64(%rsp), %rdi
-	movq  56(%rsp), %rax
-	movq  48(%rsp), %rdx
+	movq	56(%rsp), %rax
+	movq	48(%rsp), %rdx
 	movq	40(%rsp), %rsi
 	movq	32(%rsp), %rcx
 	movq	24(%rsp), %r8
 	movq	16(%rsp), %r9
 	addq	$200, %rsp
+.endm RESTORE_REGISTERS
+
+	.text
+	.file "xray_trampoline_x86.S"
+	.globl __xray_FunctionEntry
+	.align 16, 0x90
+	.type __xray_FunctionEntry, at function
+
+__xray_FunctionEntry:
+	.cfi_startproc
+	pushq %rbp
+	.cfi_def_cfa_offset 16
+	SAVE_REGISTERS
+
+	// This load has to be atomic, it's concurrent with __xray_patch().
+	// On x86/amd64, a simple (type-aligned) MOV instruction is enough.
+	movq	_ZN6__xray19XRayPatchedFunctionE(%rip), %rax
+	testq	%rax, %rax
+	je	.Ltmp0
+
+	// The patched function prolog puts its xray_instr_map index into %r10d.
+	movl	%r10d, %edi
+	xor	%esi,%esi
+	callq	*%rax
+.Ltmp0:
+	RESTORE_REGISTERS
 	popq	%rbp
 	retq
 .Ltmp1:
@@ -99,7 +105,7 @@ __xray_FunctionExit:
 	movl	$1, %esi
 	callq	*%rax
 .Ltmp2:
-  // Restore the important registers.
+	// Restore the important registers.
 	movupd	40(%rsp), %xmm0
 	movupd	24(%rsp), %xmm1
 	movq	16(%rsp), %rax
@@ -122,22 +128,7 @@ __xray_FunctionTailExit:
 	// this and increment the version number for the header.
 	pushq %rbp
 	.cfi_def_cfa_offset 16
-	subq $200, %rsp
-	movupd	%xmm0, 184(%rsp)
-	movupd	%xmm1, 168(%rsp)
-	movupd	%xmm2, 152(%rsp)
-	movupd	%xmm3, 136(%rsp)
-	movupd	%xmm4, 120(%rsp)
-	movupd	%xmm5, 104(%rsp)
-	movupd	%xmm6, 88(%rsp)
-	movupd	%xmm7, 72(%rsp)
-	movq	%rdi, 64(%rsp)
-	movq  %rax, 56(%rsp)
-	movq  %rdx, 48(%rsp)
-	movq	%rsi, 40(%rsp)
-	movq	%rcx, 32(%rsp)
-	movq	%r8, 24(%rsp)
-	movq	%r9, 16(%rsp)
+	SAVE_REGISTERS
 
 	movq	_ZN6__xray19XRayPatchedFunctionE(%rip), %rax
 	testq %rax,%rax
@@ -148,25 +139,9 @@ __xray_FunctionTailExit:
 	callq	*%rax
 
 .Ltmp4:
-  // Restore the registers.
-	movupd	184(%rsp), %xmm0
-	movupd	168(%rsp), %xmm1
-	movupd	152(%rsp), %xmm2
-	movupd	136(%rsp), %xmm3
-	movupd	120(%rsp), %xmm4
-	movupd	104(%rsp), %xmm5
-	movupd	88(%rsp) , %xmm6
-	movupd	72(%rsp) , %xmm7
-	movq	64(%rsp), %rdi
-	movq  56(%rsp), %rax
-	movq  48(%rsp), %rdx
-	movq	40(%rsp), %rsi
-	movq	32(%rsp), %rcx
-	movq	24(%rsp), %r8
-	movq	16(%rsp), %r9
-	addq	$200, %rsp
+	RESTORE_REGISTERS
 	popq	%rbp
 	retq
 .Ltmp5:
-  .size __xray_FunctionTailExit, .Ltmp5-__xray_FunctionTailExit
+	.size __xray_FunctionTailExit, .Ltmp5-__xray_FunctionTailExit
 	.cfi_endproc




More information about the llvm-commits mailing list