[llvm] 0276fa8 - [X86][ABI] Don't preserve return regs for preserve_all/preserve_most CCs

Anton Bikineev via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 23 04:32:29 PST 2023


Author: Anton Bikineev
Date: 2023-01-23T13:32:17+01:00
New Revision: 0276fa89d7a4dbe73105c9148f947716b3d8f17f

URL: https://github.com/llvm/llvm-project/commit/0276fa89d7a4dbe73105c9148f947716b3d8f17f
DIFF: https://github.com/llvm/llvm-project/commit/0276fa89d7a4dbe73105c9148f947716b3d8f17f.diff

LOG: [X86][ABI] Don't preserve return regs for preserve_all/preserve_most CCs

Currently both calling conventions preserve registers that are used to
store a return value. This causes the returned value to be lost:

  define i32 @bar() {
    %1 = call preserve_mostcc i32 @foo()
    ret i32 %1
  }

  define preserve_mostcc i32 @foo() {
    ret i32 2
    ; preserve_mostcc will restore %rax,
    ; whatever it was before the call.
  }

This contradicts the current documentation (preserve_allcc "behaves
identical to the `C` calling conventions on how arguments and return
values are passed") and also breaks [[clang::preserve_most]].

This change makes CSRs be preserved iff they are not used to store a
return value (e.g. %rax for scalars, {%rax:%rdx} for __int128, %xmm0
for double). For void functions no additional registers are
preserved, i.e. the behaviour is backward compatible with existing
code.

Differential Revision: https://reviews.llvm.org/D141020

Added: 
    llvm/test/CodeGen/X86/preserve_allcc64-ret-double.ll
    llvm/test/CodeGen/X86/preserve_mostcc64-ret-double.ll

Modified: 
    llvm/docs/LangRef.rst
    llvm/lib/Target/X86/X86CallingConv.td
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/preserve_allcc64.ll
    llvm/test/CodeGen/X86/preserve_mostcc64.ll

Removed: 
    


################################################################################
diff  --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 2ddb05c149df..dd5bcb8c78eb 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -366,8 +366,9 @@ added in the future:
     apply for values returned in callee-saved registers.
 
     - On X86-64 the callee preserves all general purpose registers, except for
-      R11. R11 can be used as a scratch register. Floating-point registers
-      (XMMs/YMMs) are not preserved and need to be saved by the caller.
+      R11 and return registers, if any. R11 can be used as a scratch register.
+      Floating-point registers (XMMs/YMMs) are not preserved and need to be
+      saved by the caller.
 
     The idea behind this convention is to support calls to runtime functions
     that have a hot path and a cold path. The hot path is usually a small piece

diff  --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td
index 4dd8a6cdd898..c92a30804014 100644
--- a/llvm/lib/Target/X86/X86CallingConv.td
+++ b/llvm/lib/Target/X86/X86CallingConv.td
@@ -1154,11 +1154,11 @@ def CSR_64_CXX_TLS_Darwin_PE : CalleeSavedRegs<(add RBP)>;
 // CSRs that are handled explicitly via copies.
 def CSR_64_CXX_TLS_Darwin_ViaCopy : CalleeSavedRegs<(sub CSR_64_TLS_Darwin, RBP)>;
 
-// All GPRs - except r11
+// All GPRs - except r11 and return registers.
 def CSR_64_RT_MostRegs : CalleeSavedRegs<(add CSR_64, RAX, RCX, RDX, RSI, RDI,
                                               R8, R9, R10)>;
 
-// All registers - except r11
+// All registers - except r11 and return registers.
 def CSR_64_RT_AllRegs     : CalleeSavedRegs<(add CSR_64_RT_MostRegs,
                                                  (sequence "XMM%u", 0, 15))>;
 def CSR_64_RT_AllRegs_AVX : CalleeSavedRegs<(add CSR_64_RT_MostRegs,

diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 70c7a0825b2c..2ea9e68dc17f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -104,6 +104,20 @@ static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
       DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
 }
 
+/// Returns true if a CC can dynamically exclude a register from the list of
+/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
+/// params/returns.
+static bool shouldDisableCalleeSavedRegisterCC(CallingConv::ID CC) {
+  switch (CC) {
+  default:
+    return false;
+  case CallingConv::X86_RegCall:
+  case CallingConv::PreserveMost:
+  case CallingConv::PreserveAll:
+    return true;
+  }
+}
+
 X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
                                      const X86Subtarget &STI)
     : TargetLowering(TM), Subtarget(STI) {
@@ -3167,7 +3181,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
   // In some cases we need to disable registers from the default CSR list.
   // For example, when they are used for argument passing.
   bool ShouldDisableCalleeSavedRegister =
-      CallConv == CallingConv::X86_RegCall ||
+      shouldDisableCalleeSavedRegisterCC(CallConv) ||
       MF.getFunction().hasFnAttribute("no_caller_saved_registers");
 
   if (CallConv == CallingConv::X86_INTR && !Outs.empty())
@@ -4319,7 +4333,7 @@ SDValue X86TargetLowering::LowerFormalArguments(
     }
   }
 
-  if (CallConv == CallingConv::X86_RegCall ||
+  if (shouldDisableCalleeSavedRegisterCC(CallConv) ||
       F.hasFnAttribute("no_caller_saved_registers")) {
     MachineRegisterInfo &MRI = MF.getRegInfo();
     for (std::pair<Register, Register> Pair : MRI.liveins())
@@ -4880,7 +4894,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   // In some calling conventions we need to remove the used physical registers
   // from the reg mask.
-  if (CallConv == CallingConv::X86_RegCall || HasNCSR) {
+  if (shouldDisableCalleeSavedRegisterCC(CallConv) || HasNCSR) {
     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
 
     // Allocate a new Reg Mask and copy Mask.

diff  --git a/llvm/test/CodeGen/X86/preserve_allcc64-ret-double.ll b/llvm/test/CodeGen/X86/preserve_allcc64-ret-double.ll
new file mode 100644
index 000000000000..517d22edb2e6
--- /dev/null
+++ b/llvm/test/CodeGen/X86/preserve_allcc64-ret-double.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7     | FileCheck --check-prefixes=ALL,SSE %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck --check-prefixes=ALL,AVX %s
+
+define preserve_allcc double @preserve_allcc1() nounwind {
+entry:
+;ALL-LABEL:   preserve_allcc1
+;SSE:         movaps %xmm1
+;SSE-NOT:     movaps %xmm0
+;AVX:         vmovups %ymm1
+;AVX-NOT:     vmovups %ymm0
+;SSE-NOT:     movaps {{.*}} %xmm0
+;SSE:         movaps {{.*}} %xmm1
+;AVX-NOT:     vmovups {{.*}} %ymm0
+;AVX:         vmovups {{.*}} %ymm1
+  call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{rbp},~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15}"()
+  ret double 0.
+}
+
+; Make sure XMM0 (return register) and R11 are saved before the call
+declare preserve_allcc double @bar_double(i64, i64)
+define void @preserve_allcc2() nounwind {
+entry:
+;SSE-LABEL: preserve_allcc2
+;SSE:       movq %r11, [[REG1:%[a-z0-9]+]]
+;SSE:       movaps %xmm0, [[REG2:[-0-9]*\(%r[sb]p\)]]
+;SSE:       movq [[REG1]], %r11
+;SSE:       movaps [[REG2]], %xmm0
+  %a0 = call i64 asm sideeffect "", "={rax}"() nounwind
+  %a1 = call i64 asm sideeffect "", "={rcx}"() nounwind
+  %a2 = call i64 asm sideeffect "", "={rdx}"() nounwind
+  %a3 = call i64 asm sideeffect "", "={r8}"() nounwind
+  %a4 = call i64 asm sideeffect "", "={r9}"() nounwind
+  %a5 = call i64 asm sideeffect "", "={r10}"() nounwind
+  %a6 = call i64 asm sideeffect "", "={r11}"() nounwind
+  %a10 = call <2 x double> asm sideeffect "", "={xmm0}"() nounwind
+  %a11 = call <2 x double> asm sideeffect "", "={xmm1}"() nounwind
+  %a12 = call <2 x double> asm sideeffect "", "={xmm2}"() nounwind
+  %a13 = call <2 x double> asm sideeffect "", "={xmm3}"() nounwind
+  %a14 = call <2 x double> asm sideeffect "", "={xmm4}"() nounwind
+  %a15 = call <2 x double> asm sideeffect "", "={xmm5}"() nounwind
+  %a16 = call <2 x double> asm sideeffect "", "={xmm6}"() nounwind
+  %a17 = call <2 x double> asm sideeffect "", "={xmm7}"() nounwind
+  %a18 = call <2 x double> asm sideeffect "", "={xmm8}"() nounwind
+  %a19 = call <2 x double> asm sideeffect "", "={xmm9}"() nounwind
+  %a20 = call <2 x double> asm sideeffect "", "={xmm10}"() nounwind
+  %a21 = call <2 x double> asm sideeffect "", "={xmm11}"() nounwind
+  %a22 = call <2 x double> asm sideeffect "", "={xmm12}"() nounwind
+  %a23 = call <2 x double> asm sideeffect "", "={xmm13}"() nounwind
+  %a24 = call <2 x double> asm sideeffect "", "={xmm14}"() nounwind
+  %a25 = call <2 x double> asm sideeffect "", "={xmm15}"() nounwind
+  call preserve_allcc double @bar_double(i64 1, i64 2)
+  call void asm sideeffect "", "{rax},{rcx},{rdx},{r8},{r9},{r10},{r11},{xmm0},{xmm1},{xmm2},{xmm3},{xmm4},{xmm5},{xmm6},{xmm7},{xmm8},{xmm9},{xmm10},{xmm11},{xmm12},{xmm13},{xmm14},{xmm15}"(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, <2 x double> %a10, <2 x double> %a11, <2 x double> %a12, <2 x double> %a13, <2 x double> %a14, <2 x double> %a15, <2 x double> %a16, <2 x double> %a17, <2 x double> %a18, <2 x double> %a19, <2 x double> %a20, <2 x double> %a21, <2 x double> %a22, <2 x double> %a23, <2 x double> %a24, <2 x double> %a25)
+  ret void
+}

diff  --git a/llvm/test/CodeGen/X86/preserve_allcc64.ll b/llvm/test/CodeGen/X86/preserve_allcc64.ll
index 545cd36ab957..c19040738ee3 100644
--- a/llvm/test/CodeGen/X86/preserve_allcc64.ll
+++ b/llvm/test/CodeGen/X86/preserve_allcc64.ll
@@ -1,82 +1,138 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7     | FileCheck --check-prefix=SSE %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck --check-prefix=AVX %s
+; RUN: sed -e "s/RETTYPE/void/;s/RETVAL//" %s | llc -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck --check-prefixes=ALL,SSE,VOID %s
+; RUN: sed -e "s/RETTYPE/i32/;s/RETVAL/undef/" %s | llc -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck --check-prefixes=ALL,SSE,INT %s
+; RUN: sed -e "s/RETTYPE/\{i64\,i64\}/;s/RETVAL/undef/" %s | llc -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck --check-prefixes=ALL,SSE,INT128 %s
+;
+; RUN: sed -e "s/RETTYPE/void/;s/RETVAL//" %s | llc -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck --check-prefixes=ALL,AVX,VOID %s
+; RUN: sed -e "s/RETTYPE/i32/;s/RETVAL/undef/" %s | llc -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck --check-prefixes=ALL,AVX,INT %s
+; RUN: sed -e "s/RETTYPE/\{i64\,i64\}/;s/RETVAL/undef/" %s | llc -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck --check-prefixes=ALL,AVX,INT128 %s
 
-define preserve_allcc void @preserve_allcc1() nounwind {
+define preserve_allcc RETTYPE @preserve_allcc1() nounwind {
 entry:
-;SSE-LABEL: preserve_allcc1
-;SSE:       pushq %r10
-;SSE-NEXT:  pushq %r9
-;SSE-NEXT:  pushq %r8
-;SSE-NEXT:  pushq %rdi
-;SSE-NEXT:  pushq %rsi
-;SSE-NEXT:  pushq %rdx
-;SSE-NEXT:  pushq %rcx
-;SSE-NEXT:  pushq %rax
-;SSE-NEXT:  pushq %rbp
-;SSE-NEXT:  pushq %r15
-;SSE-NEXT:  pushq %r14
-;SSE-NEXT:  pushq %r13
-;SSE-NEXT:  pushq %r12
-;SSE-NEXT:  pushq %rbx
-;SSE:       movaps %xmm15
-;SSE-NEXT:  movaps %xmm14
-;SSE-NEXT:  movaps %xmm13
-;SSE-NEXT:  movaps %xmm12
-;SSE-NEXT:  movaps %xmm11
-;SSE-NEXT:  movaps %xmm10
-;SSE-NEXT:  movaps %xmm9
-;SSE-NEXT:  movaps %xmm8
-;SSE-NEXT:  movaps %xmm7
-;SSE-NEXT:  movaps %xmm6
-;SSE-NEXT:  movaps %xmm5
-;SSE-NEXT:  movaps %xmm4
-;SSE-NEXT:  movaps %xmm3
-;SSE-NEXT:  movaps %xmm2
-;SSE-NEXT:  movaps %xmm1
-;SSE-NEXT:  movaps %xmm0
-;AVX-LABEL: preserve_allcc1
-;AVX:       pushq %r10
-;AVX-NEXT:  pushq %r9
-;AVX-NEXT:  pushq %r8
-;AVX-NEXT:  pushq %rdi
-;AVX-NEXT:  pushq %rsi
-;AVX-NEXT:  pushq %rdx
-;AVX-NEXT:  pushq %rcx
-;AVX-NEXT:  pushq %rax
-;AVX-NEXT:  pushq %rbp
-;AVX-NEXT:  pushq %r15
-;AVX-NEXT:  pushq %r14
-;AVX-NEXT:  pushq %r13
-;AVX-NEXT:  pushq %r12
-;AVX-NEXT:  pushq %rbx
-;AVX:       vmovups %ymm15
-;AVX-NEXT:  vmovups %ymm14
-;AVX-NEXT:  vmovups %ymm13
-;AVX-NEXT:  vmovups %ymm12
-;AVX-NEXT:  vmovups %ymm11
-;AVX-NEXT:  vmovups %ymm10
-;AVX-NEXT:  vmovups %ymm9
-;AVX-NEXT:  vmovups %ymm8
-;AVX-NEXT:  vmovups %ymm7
-;AVX-NEXT:  vmovups %ymm6
-;AVX-NEXT:  vmovups %ymm5
-;AVX-NEXT:  vmovups %ymm4
-;AVX-NEXT:  vmovups %ymm3
-;AVX-NEXT:  vmovups %ymm2
-;AVX-NEXT:  vmovups %ymm1
-;AVX-NEXT:  vmovups %ymm0
+;ALL-LABEL:   preserve_allcc1
+;ALL:         pushq %r10
+;ALL-NEXT:    pushq %r9
+;ALL-NEXT:    pushq %r8
+;ALL-NEXT:    pushq %rdi
+;ALL-NEXT:    pushq %rsi
+;VOID-NEXT:   pushq %rdx
+;INT-NEXT:    pushq %rdx
+;INT128-NOT:  pushq %rdx
+;ALL-NEXT:    pushq %rcx
+;VOID-NEXT:   pushq %rax
+;INT-NOT:     pushq %rax
+;INT128-NOT:  pushq %rax
+;ALL-NEXT:    pushq %rbp
+;ALL-NEXT:    pushq %r15
+;ALL-NEXT:    pushq %r14
+;ALL-NEXT:    pushq %r13
+;ALL-NEXT:    pushq %r12
+;ALL-NEXT:    pushq %rbx
+;SSE:         movaps %xmm15
+;SSE-NEXT:    movaps %xmm14
+;SSE-NEXT:    movaps %xmm13
+;SSE-NEXT:    movaps %xmm12
+;SSE-NEXT:    movaps %xmm11
+;SSE-NEXT:    movaps %xmm10
+;SSE-NEXT:    movaps %xmm9
+;SSE-NEXT:    movaps %xmm8
+;SSE-NEXT:    movaps %xmm7
+;SSE-NEXT:    movaps %xmm6
+;SSE-NEXT:    movaps %xmm5
+;SSE-NEXT:    movaps %xmm4
+;SSE-NEXT:    movaps %xmm3
+;SSE-NEXT:    movaps %xmm2
+;SSE-NEXT:    movaps %xmm1
+;SSE-NEXT:    movaps %xmm0
+;AVX:         vmovups %ymm15
+;AVX-NEXT:    vmovups %ymm14
+;AVX-NEXT:    vmovups %ymm13
+;AVX-NEXT:    vmovups %ymm12
+;AVX-NEXT:    vmovups %ymm11
+;AVX-NEXT:    vmovups %ymm10
+;AVX-NEXT:    vmovups %ymm9
+;AVX-NEXT:    vmovups %ymm8
+;AVX-NEXT:    vmovups %ymm7
+;AVX-NEXT:    vmovups %ymm6
+;AVX-NEXT:    vmovups %ymm5
+;AVX-NEXT:    vmovups %ymm4
+;AVX-NEXT:    vmovups %ymm3
+;AVX-NEXT:    vmovups %ymm2
+;AVX-NEXT:    vmovups %ymm1
+;AVX-NEXT:    vmovups %ymm0
+;SSE:         movaps {{.*}} %xmm0
+;SSE-NEXT:    movaps {{.*}} %xmm1
+;SSE-NEXT:    movaps {{.*}} %xmm2
+;SSE-NEXT:    movaps {{.*}} %xmm3
+;SSE-NEXT:    movaps {{.*}} %xmm4
+;SSE-NEXT:    movaps {{.*}} %xmm5
+;SSE-NEXT:    movaps {{.*}} %xmm6
+;SSE-NEXT:    movaps {{.*}} %xmm7
+;SSE-NEXT:    movaps {{.*}} %xmm8
+;SSE-NEXT:    movaps {{.*}} %xmm9
+;SSE-NEXT:    movaps {{.*}} %xmm10
+;SSE-NEXT:    movaps {{.*}} %xmm11
+;SSE-NEXT:    movaps {{.*}} %xmm12
+;SSE-NEXT:    movaps {{.*}} %xmm13
+;SSE-NEXT:    movaps {{.*}} %xmm14
+;SSE-NEXT:    movaps {{.*}} %xmm15
+;AVX:         vmovups {{.*}} %ymm0
+;AVX-NEXT:    vmovups {{.*}} %ymm1
+;AVX-NEXT:    vmovups {{.*}} %ymm2
+;AVX-NEXT:    vmovups {{.*}} %ymm3
+;AVX-NEXT:    vmovups {{.*}} %ymm4
+;AVX-NEXT:    vmovups {{.*}} %ymm5
+;AVX-NEXT:    vmovups {{.*}} %ymm6
+;AVX-NEXT:    vmovups {{.*}} %ymm7
+;AVX-NEXT:    vmovups {{.*}} %ymm8
+;AVX-NEXT:    vmovups {{.*}} %ymm9
+;AVX-NEXT:    vmovups {{.*}} %ymm10
+;AVX-NEXT:    vmovups {{.*}} %ymm11
+;AVX-NEXT:    vmovups {{.*}} %ymm12
+;AVX-NEXT:    vmovups {{.*}} %ymm13
+;AVX-NEXT:    vmovups {{.*}} %ymm14
+;AVX-NEXT:    vmovups {{.*}} %ymm15
+;ALL:         popq    %rbx
+;ALL-NEXT:    popq    %r12
+;ALL-NEXT:    popq    %r13
+;ALL-NEXT:    popq    %r14
+;ALL-NEXT:    popq    %r15
+;ALL-NEXT:    popq    %rbp
+;VOID-NEXT:   popq    %rax
+;INT-NOT:     popq    %rax
+;INT128-NOT:  popq    %rax
+;ALL-NEXT:    popq    %rcx
+;VOID-NEXT:   popq    %rdx
+;INT-NEXT:    popq    %rdx
+;INT128-NOT:  popq    %rdx
+;ALL-NEXT:    popq    %rsi
+;ALL-NEXT:    popq    %rdi
+;ALL-NEXT:    popq    %r8
+;ALL-NEXT:    popq    %r9
+;ALL-NEXT:    popq    %r10
   call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{rbp},~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15}"()
-  ret void
+  ret RETTYPE RETVAL
 }
 
-; Make sure only R11 is saved before the call
-declare preserve_allcc void @bar(i64, i64, double, double)
+; Make sure R11 and return registers are saved before the call
+declare preserve_allcc RETTYPE @bar(i64, i64, double, double)
 define void @preserve_allcc2() nounwind {
 entry:
-;SSE-LABEL: preserve_allcc2
-;SSE:       movq %r11, [[REG:%[a-z0-9]+]]
-;SSE-NOT:   movaps %xmm
-;SSE:       movq [[REG]], %r11
+;ALL-LABEL: preserve_allcc2
+;VOID-NOT:  movq %rax, [[REG1:%[a-z0-9]+]]
+;INT:       movq %rax, [[REG1:%[a-z0-9]+]]
+;INT128:    movq %rax, [[REG1:%[a-z0-9]+]]
+;VOID-NOT:  movq %rdx, [[REG2:%[a-z0-9]+]]
+;INT-NOT:   movq %rdx, [[REG2:%[a-z0-9]+]]
+;INT128:    movq %rdx, [[REG2:%[a-z0-9]+]]
+;ALL:       movq %r11, [[REG3:%[a-z0-9]+]]
+;ALL-NOT:   movaps %xmm
+;VOID-NOT:  movq {{.*}}, %rax
+;INT:       movq [[REG1]], %rax
+;INT128:    movq [[REG1]], %rax
+;VOID-NOT:  movq {{.*}}, %rdx
+;INT-NOT:   movq {{.*}}, %rdx
+;INT128:    movq [[REG2]], %rdx
+;ALL:       movq [[REG3]], %r11
   %a0 = call i64 asm sideeffect "", "={rax}"() nounwind
   %a1 = call i64 asm sideeffect "", "={rcx}"() nounwind
   %a2 = call i64 asm sideeffect "", "={rdx}"() nounwind
@@ -98,7 +154,7 @@ entry:
   %a21 = call <2 x double> asm sideeffect "", "={xmm13}"() nounwind
   %a22 = call <2 x double> asm sideeffect "", "={xmm14}"() nounwind
   %a23 = call <2 x double> asm sideeffect "", "={xmm15}"() nounwind
-  call preserve_allcc void @bar(i64 1, i64 2, double 3.0, double 4.0)
+  call preserve_allcc RETTYPE @bar(i64 1, i64 2, double 3.0, double 4.0)
   call void asm sideeffect "", "{rax},{rcx},{rdx},{r8},{r9},{r10},{r11},{xmm2},{xmm3},{xmm4},{xmm5},{xmm6},{xmm7},{xmm8},{xmm9},{xmm10},{xmm11},{xmm12},{xmm13},{xmm14},{xmm15}"(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, <2 x double> %a10, <2 x double> %a11, <2 x double> %a12, <2 x double> %a13, <2 x double> %a14, <2 x double> %a15, <2 x double> %a16, <2 x double> %a17, <2 x double> %a18, <2 x double> %a19, <2 x double> %a20, <2 x double> %a21, <2 x double> %a22, <2 x double> %a23)
   ret void
 }

diff  --git a/llvm/test/CodeGen/X86/preserve_mostcc64-ret-double.ll b/llvm/test/CodeGen/X86/preserve_mostcc64-ret-double.ll
new file mode 100644
index 000000000000..99740d3b31c0
--- /dev/null
+++ b/llvm/test/CodeGen/X86/preserve_mostcc64-ret-double.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck %s
+
+; Make sure XMM0 (return register) and R11 are saved before the call
+declare preserve_mostcc double @foo_double(i64, i64)
+define void @preserve_mostcc1() nounwind {
+entry:
+;CHECK-LABEL: preserve_mostcc1
+;CHECK:       movq %r11, [[REG1:%[a-z0-9]+]]
+;CHECK:       movaps %xmm0, [[REG2:[-0-9]*\(%r[sb]p\)]]
+;CHECK:       call
+;CHECK:       movq [[REG1]], %r11
+;CHECK:       movaps [[REG2]], %xmm0
+  %a0 = call i64 asm sideeffect "", "={rax}"() nounwind
+  %a1 = call i64 asm sideeffect "", "={rcx}"() nounwind
+  %a2 = call i64 asm sideeffect "", "={rdx}"() nounwind
+  %a3 = call i64 asm sideeffect "", "={r8}"() nounwind
+  %a4 = call i64 asm sideeffect "", "={r9}"() nounwind
+  %a5 = call i64 asm sideeffect "", "={r10}"() nounwind
+  %a6 = call i64 asm sideeffect "", "={r11}"() nounwind
+  %a10 = call <2 x double> asm sideeffect "", "={xmm0}"() nounwind
+  %a11 = call <2 x double> asm sideeffect "", "={xmm1}"() nounwind
+  %a12 = call <2 x double> asm sideeffect "", "={xmm2}"() nounwind
+  %a13 = call <2 x double> asm sideeffect "", "={xmm3}"() nounwind
+  %a14 = call <2 x double> asm sideeffect "", "={xmm4}"() nounwind
+  %a15 = call <2 x double> asm sideeffect "", "={xmm5}"() nounwind
+  %a16 = call <2 x double> asm sideeffect "", "={xmm6}"() nounwind
+  %a17 = call <2 x double> asm sideeffect "", "={xmm7}"() nounwind
+  %a18 = call <2 x double> asm sideeffect "", "={xmm8}"() nounwind
+  %a19 = call <2 x double> asm sideeffect "", "={xmm9}"() nounwind
+  %a20 = call <2 x double> asm sideeffect "", "={xmm10}"() nounwind
+  %a21 = call <2 x double> asm sideeffect "", "={xmm11}"() nounwind
+  %a22 = call <2 x double> asm sideeffect "", "={xmm12}"() nounwind
+  %a23 = call <2 x double> asm sideeffect "", "={xmm13}"() nounwind
+  %a24 = call <2 x double> asm sideeffect "", "={xmm14}"() nounwind
+  %a25 = call <2 x double> asm sideeffect "", "={xmm15}"() nounwind
+  call preserve_mostcc double @foo_double(i64 1, i64 2)
+  call void asm sideeffect "", "{rax},{rcx},{rdx},{r8},{r9},{r10},{r11},{xmm0},{xmm1},{xmm2},{xmm3},{xmm4},{xmm5},{xmm6},{xmm7},{xmm8},{xmm9},{xmm10},{xmm11},{xmm12},{xmm13},{xmm14},{xmm15}"(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, <2 x double> %a10, <2 x double> %a11, <2 x double> %a12, <2 x double> %a13, <2 x double> %a14, <2 x double> %a15, <2 x double> %a16, <2 x double> %a17, <2 x double> %a18, <2 x double> %a19, <2 x double> %a20, <2 x double> %a21, <2 x double> %a22, <2 x double> %a23, <2 x double> %a24, <2 x double> %a25)
+  ret void
+}

diff  --git a/llvm/test/CodeGen/X86/preserve_mostcc64.ll b/llvm/test/CodeGen/X86/preserve_mostcc64.ll
index 4ee293e14304..968e3fd78a0f 100644
--- a/llvm/test/CodeGen/X86/preserve_mostcc64.ll
+++ b/llvm/test/CodeGen/X86/preserve_mostcc64.ll
@@ -1,64 +1,99 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7     | FileCheck --check-prefix=SSE %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck --check-prefix=AVX %s
+; RUN: sed -e "s/RETTYPE/void/;s/RETVAL//" %s | llc -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck --check-prefixes=ALL,VOID %s
+; RUN: sed -e "s/RETTYPE/i32/;s/RETVAL/undef/" %s | llc -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck --check-prefixes=ALL,INT %s
+; RUN: sed -e "s/RETTYPE/\{i64\,i64\}/;s/RETVAL/undef/" %s | llc -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck --check-prefixes=ALL,INT128 %s
 
-; Every GPR should be saved - except r11
-define preserve_mostcc void @preserve_mostcc1() nounwind {
+; Every GPR should be saved - except r11 and return registers
+define preserve_mostcc RETTYPE @preserve_mostcc1() nounwind {
 entry:
-;SSE-LABEL: preserve_mostcc1
-;SSE:       pushq %r10
-;SSE-NEXT:  pushq %r9
-;SSE-NEXT:  pushq %r8
-;SSE-NEXT:  pushq %rdi
-;SSE-NEXT:  pushq %rsi
-;SSE-NEXT:  pushq %rdx
-;SSE-NEXT:  pushq %rcx
-;SSE-NEXT:  pushq %rax
-;SSE-NEXT:  pushq %rbp
-;SSE-NEXT:  pushq %r15
-;SSE-NEXT:  pushq %r14
-;SSE-NEXT:  pushq %r13
-;SSE-NEXT:  pushq %r12
-;SSE-NEXT:  pushq %rbx
-;AVX-LABEL: preserve_mostcc1
-;AVX:       pushq %r10
-;AVX-NEXT:  pushq %r9
-;AVX-NEXT:  pushq %r8
-;AVX-NEXT:  pushq %rdi
-;AVX-NEXT:  pushq %rsi
-;AVX-NEXT:  pushq %rdx
-;AVX-NEXT:  pushq %rcx
-;AVX-NEXT:  pushq %rax
-;AVX-NEXT:  pushq %rbp
-;AVX-NEXT:  pushq %r15
-;AVX-NEXT:  pushq %r14
-;AVX-NEXT:  pushq %r13
-;AVX-NEXT:  pushq %r12
-;AVX-NEXT:  pushq %rbx
+;ALL-LABEL:   preserve_mostcc1
+;ALL:         pushq %r10
+;ALL-NEXT:    pushq %r9
+;ALL-NEXT:    pushq %r8
+;ALL-NEXT:    pushq %rdi
+;ALL-NEXT:    pushq %rsi
+;VOID-NEXT:   pushq %rdx
+;INT-NEXT:    pushq %rdx
+;INT128-NOT:  pushq %rdx
+;ALL-NEXT:    pushq %rcx
+;VOID-NEXT:   pushq %rax
+;INT-NOT:     pushq %rax
+;INT128-NOT:  pushq %rax
+;ALL-NEXT:    pushq %rbp
+;ALL-NEXT:    pushq %r15
+;ALL-NEXT:    pushq %r14
+;ALL-NEXT:    pushq %r13
+;ALL-NEXT:    pushq %r12
+;ALL-NEXT:    pushq %rbx
+;ALL:         popq    %rbx
+;ALL-NEXT:    popq    %r12
+;ALL-NEXT:    popq    %r13
+;ALL-NEXT:    popq    %r14
+;ALL-NEXT:    popq    %r15
+;ALL-NEXT:    popq    %rbp
+;VOID-NEXT:   popq    %rax
+;INT-NOT:     popq    %rax
+;INT128-NOT:  popq    %rax
+;ALL-NEXT:    popq    %rcx
+;VOID-NEXT:   popq    %rdx
+;INT-NEXT:    popq    %rdx
+;INT128-NOT:  popq    %rdx
+;ALL-NEXT:    popq    %rsi
+;ALL-NEXT:    popq    %rdi
+;ALL-NEXT:    popq    %r8
+;ALL-NEXT:    popq    %r9
+;ALL-NEXT:    popq    %r10
   call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{rbp},~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15}"()
-  ret void
+  ret RETTYPE RETVAL
 }
 
-; Make sure R11 and XMMs are saved before the call
-declare preserve_mostcc void @foo(i64, i64, double, double)
+; Make sure R11, return registers and XMMs are saved before the call
+declare preserve_mostcc RETTYPE @foo(i64, i64, double, double)
 define void @preserve_mostcc2() nounwind {
 entry:
-;SSE-LABEL: preserve_mostcc2
-;SSE:       movq %r11, [[REG:%[a-z0-9]+]]
-;SSE:       movaps %xmm2
-;SSE:       movaps %xmm3
-;SSE:       movaps %xmm4
-;SSE:       movaps %xmm5
-;SSE:       movaps %xmm6
-;SSE:       movaps %xmm7
-;SSE:       movaps %xmm8
-;SSE:       movaps %xmm9
-;SSE:       movaps %xmm10
-;SSE:       movaps %xmm11
-;SSE:       movaps %xmm12
-;SSE:       movaps %xmm13
-;SSE:       movaps %xmm14
-;SSE:       movaps %xmm15
-;SSE:       movq [[REG]], %r11
+;ALL-LABEL: preserve_mostcc2
+;VOID-NOT:  movq %rax, [[REG1:%[a-z0-9]+]]
+;INT:       movq %rax, [[REG1:%[a-z0-9]+]]
+;INT128:    movq %rax, [[REG1:%[a-z0-9]+]]
+;VOID-NOT:  movq %rdx, [[REG2:%[a-z0-9]+]]
+;INT-NOT:   movq %rdx, [[REG2:%[a-z0-9]+]]
+;INT128:    movq %rdx, [[REG2:%[a-z0-9]+]]
+;ALL:       movq %r11, [[REG3:%[a-z0-9]+]]
+;ALL:       movaps %xmm2
+;ALL:       movaps %xmm3
+;ALL:       movaps %xmm4
+;ALL:       movaps %xmm5
+;ALL:       movaps %xmm6
+;ALL:       movaps %xmm7
+;ALL:       movaps %xmm8
+;ALL:       movaps %xmm9
+;ALL:       movaps %xmm10
+;ALL:       movaps %xmm11
+;ALL:       movaps %xmm12
+;ALL:       movaps %xmm13
+;ALL:       movaps %xmm14
+;ALL:       movaps %xmm15
+;ALL:       call
+;VOID-NOT:  movq {{.*}}, %rax
+;INT:       movq [[REG1]], %rax
+;INT128:    movq [[REG1]], %rax
+;VOID-NOT:  movq {{.*}}, %rdx
+;INT-NOT:   movq {{.*}}, %rdx
+;INT128:    movq [[REG2]], %rdx
+;ALL:       movq [[REG3]], %r11
+;ALL:       movaps {{.*}} %xmm2
+;ALL:       movaps {{.*}} %xmm3
+;ALL:       movaps {{.*}} %xmm4
+;ALL:       movaps {{.*}} %xmm5
+;ALL:       movaps {{.*}} %xmm6
+;ALL:       movaps {{.*}} %xmm7
+;ALL:       movaps {{.*}} %xmm8
+;ALL:       movaps {{.*}} %xmm9
+;ALL:       movaps {{.*}} %xmm10
+;ALL:       movaps {{.*}} %xmm11
+;ALL:       movaps {{.*}} %xmm12
+;ALL:       movaps {{.*}} %xmm13
+;ALL:       movaps {{.*}} %xmm14
+;ALL:       movaps {{.*}} %xmm15
   %a0 = call i64 asm sideeffect "", "={rax}"() nounwind
   %a1 = call i64 asm sideeffect "", "={rcx}"() nounwind
   %a2 = call i64 asm sideeffect "", "={rdx}"() nounwind
@@ -80,7 +115,7 @@ entry:
   %a21 = call <2 x double> asm sideeffect "", "={xmm13}"() nounwind
   %a22 = call <2 x double> asm sideeffect "", "={xmm14}"() nounwind
   %a23 = call <2 x double> asm sideeffect "", "={xmm15}"() nounwind
-  call preserve_mostcc void @foo(i64 1, i64 2, double 3.0, double 4.0)
+  call preserve_mostcc RETTYPE @foo(i64 1, i64 2, double 3.0, double 4.0)
   call void asm sideeffect "", "{rax},{rcx},{rdx},{r8},{r9},{r10},{r11},{xmm2},{xmm3},{xmm4},{xmm5},{xmm6},{xmm7},{xmm8},{xmm9},{xmm10},{xmm11},{xmm12},{xmm13},{xmm14},{xmm15}"(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, <2 x double> %a10, <2 x double> %a11, <2 x double> %a12, <2 x double> %a13, <2 x double> %a14, <2 x double> %a15, <2 x double> %a16, <2 x double> %a17, <2 x double> %a18, <2 x double> %a19, <2 x double> %a20, <2 x double> %a21, <2 x double> %a22, <2 x double> %a23)
   ret void
 }


        


More information about the llvm-commits mailing list