[llvm] Spill/Restore FP/BP under option (PR #114791)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 4 05:14:39 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Mahesh-Attarde (mahesh-attarde)
<details>
<summary>Changes</summary>
With Change https://github.com/llvm/llvm-project/pull/81048
There are more than 70+ Application Runtime fails for X86 Target.
Moving this change under option.
---
Patch is 46.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/114791.diff
13 Files Affected:
- (modified) llvm/lib/CodeGen/PrologEpilogInserter.cpp (+7-1)
- (modified) llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll (+29-4)
- (modified) llvm/test/CodeGen/X86/apx/push2-pop2.ll (+151-24)
- (modified) llvm/test/CodeGen/X86/apx/pushp-popp.ll (+16-4)
- (modified) llvm/test/CodeGen/X86/avx512-intel-ocl.ll (+191-8)
- (modified) llvm/test/CodeGen/X86/clobber_base_ptr.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/clobber_frame_ptr.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/clobber_frame_ptr2.ll (+2-1)
- (modified) llvm/test/CodeGen/X86/clobber_frame_ptr_x32.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/i386-baseptr.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll (+28-2)
- (modified) llvm/test/CodeGen/X86/x86-32-intrcc.ll (+1-3)
- (modified) llvm/test/CodeGen/X86/x86-64-baseptr.ll (+182-12)
``````````diff
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index ee03eaa8ae527c..da9385ce4c96d4 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -77,6 +77,11 @@ using MBBVector = SmallVector<MachineBasicBlock *, 4>;
STATISTIC(NumLeafFuncWithSpills, "Number of leaf functions with CSRs");
STATISTIC(NumFuncSeen, "Number of functions seen in PEI");
+// Experimental Feature enables spilling and reload FP/BP
+static cl::opt<bool>
+ EnableSpillFPBP("enable-spill-fpbp",
+ cl::desc("Spill clobbered fp register to stack."),
+ cl::init(false), cl::Hidden);
namespace {
@@ -231,7 +236,8 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
// Spill frame pointer and/or base pointer registers if they are clobbered.
// It is placed before call frame instruction elimination so it will not mess
// with stack arguments.
- TFI->spillFPBP(MF);
+ if (EnableSpillFPBP)
+ TFI->spillFPBP(MF);
// Calculate the MaxCallFrameSize value for the function's frame
// information. Also eliminates call frame pseudo instructions.
diff --git a/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll b/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll
index f20c4c1ae27867..b4b205bd292276 100644
--- a/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll
+++ b/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll
@@ -2,6 +2,7 @@
; Check PUSH2/POP2 is not used for vector registers
; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+push2pop2 | FileCheck %s --check-prefix=CHECK
; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+push2pop2 -frame-pointer=all | FileCheck %s --check-prefix=FRAME
+; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+push2pop2 -frame-pointer=all --enable-spill-fpbp=true | FileCheck %s --check-prefix=FRAME-SPILL
define void @widget(float %arg) nounwind {
; CHECK-LABEL: widget:
@@ -43,17 +44,41 @@ define void @widget(float %arg) nounwind {
; FRAME-NEXT: xorl %r8d, %r8d
; FRAME-NEXT: callq *%rsi
; FRAME-NEXT: movss %xmm6, 0
-; FRAME-NEXT: pushq %rbp
-; FRAME-NEXT: pushq %rax
; FRAME-NEXT: #APP
; FRAME-NEXT: #NO_APP
-; FRAME-NEXT: popq %rax
-; FRAME-NEXT: popq %rbp
; FRAME-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
; FRAME-NEXT: addq $48, %rsp
; FRAME-NEXT: pop2 %r15, %rsi
; FRAME-NEXT: popq %rbp
; FRAME-NEXT: retq
+;
+; FRAME-SPILL-LABEL: widget:
+; FRAME-SPILL: # %bb.0: # %bb
+; FRAME-SPILL-NEXT: pushq %rbp
+; FRAME-SPILL-NEXT: push2 %rsi, %r15
+; FRAME-SPILL-NEXT: subq $48, %rsp
+; FRAME-SPILL-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
+; FRAME-SPILL-NEXT: movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; FRAME-SPILL-NEXT: movaps %xmm0, %xmm6
+; FRAME-SPILL-NEXT: xorl %esi, %esi
+; FRAME-SPILL-NEXT: xorl %ecx, %ecx
+; FRAME-SPILL-NEXT: callq *%rsi
+; FRAME-SPILL-NEXT: xorl %ecx, %ecx
+; FRAME-SPILL-NEXT: xorl %edx, %edx
+; FRAME-SPILL-NEXT: xorl %r8d, %r8d
+; FRAME-SPILL-NEXT: callq *%rsi
+; FRAME-SPILL-NEXT: movss %xmm6, 0
+; FRAME-SPILL-NEXT: pushq %rbp
+; FRAME-SPILL-NEXT: pushq %rax
+; FRAME-SPILL-NEXT: #APP
+; FRAME-SPILL-NEXT: #NO_APP
+; FRAME-SPILL-NEXT: popq %rax
+; FRAME-SPILL-NEXT: popq %rbp
+; FRAME-SPILL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
+; FRAME-SPILL-NEXT: addq $48, %rsp
+; FRAME-SPILL-NEXT: pop2 %r15, %rsi
+; FRAME-SPILL-NEXT: popq %rbp
+; FRAME-SPILL-NEXT: retq
bb:
%call = tail call float null(ptr null)
%call1 = tail call i32 null(ptr null, i32 0, i32 0)
diff --git a/llvm/test/CodeGen/X86/apx/push2-pop2.ll b/llvm/test/CodeGen/X86/apx/push2-pop2.ll
index f5be484be2b1a6..d6bb1a24aa6b7b 100644
--- a/llvm/test/CodeGen/X86/apx/push2-pop2.ll
+++ b/llvm/test/CodeGen/X86/apx/push2-pop2.ll
@@ -2,6 +2,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+push2pop2 | FileCheck %s --check-prefix=CHECK
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+push2pop2,+ppx | FileCheck %s --check-prefix=PPX
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+push2pop2 -frame-pointer=all | FileCheck %s --check-prefix=FRAME
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+push2pop2 -frame-pointer=all --enable-spill-fpbp=true | FileCheck %s --check-prefix=FRAME-SPILL
define void @csr1() nounwind {
; CHECK-LABEL: csr1:
@@ -24,14 +25,23 @@ define void @csr1() nounwind {
; FRAME: # %bb.0: # %entry
; FRAME-NEXT: pushq %rbp
; FRAME-NEXT: movq %rsp, %rbp
-; FRAME-NEXT: pushq %rbp
-; FRAME-NEXT: pushq %rax
; FRAME-NEXT: #APP
; FRAME-NEXT: #NO_APP
-; FRAME-NEXT: popq %rax
-; FRAME-NEXT: popq %rbp
; FRAME-NEXT: popq %rbp
; FRAME-NEXT: retq
+;
+; FRAME-SPILL-LABEL: csr1:
+; FRAME-SPILL: # %bb.0: # %entry
+; FRAME-SPILL-NEXT: pushq %rbp
+; FRAME-SPILL-NEXT: movq %rsp, %rbp
+; FRAME-SPILL-NEXT: pushq %rbp
+; FRAME-SPILL-NEXT: pushq %rax
+; FRAME-SPILL-NEXT: #APP
+; FRAME-SPILL-NEXT: #NO_APP
+; FRAME-SPILL-NEXT: popq %rax
+; FRAME-SPILL-NEXT: popq %rbp
+; FRAME-SPILL-NEXT: popq %rbp
+; FRAME-SPILL-NEXT: retq
entry:
tail call void asm sideeffect "", "~{rbp},~{dirflag},~{fpsr},~{flags}"()
ret void
@@ -63,15 +73,26 @@ define void @csr2() nounwind {
; FRAME-NEXT: pushq %rbp
; FRAME-NEXT: movq %rsp, %rbp
; FRAME-NEXT: pushq %r15
-; FRAME-NEXT: pushq %rbp
-; FRAME-NEXT: pushq %rax
; FRAME-NEXT: #APP
; FRAME-NEXT: #NO_APP
-; FRAME-NEXT: popq %rax
-; FRAME-NEXT: popq %rbp
; FRAME-NEXT: popq %r15
; FRAME-NEXT: popq %rbp
; FRAME-NEXT: retq
+;
+; FRAME-SPILL-LABEL: csr2:
+; FRAME-SPILL: # %bb.0: # %entry
+; FRAME-SPILL-NEXT: pushq %rbp
+; FRAME-SPILL-NEXT: movq %rsp, %rbp
+; FRAME-SPILL-NEXT: pushq %r15
+; FRAME-SPILL-NEXT: pushq %rbp
+; FRAME-SPILL-NEXT: pushq %rax
+; FRAME-SPILL-NEXT: #APP
+; FRAME-SPILL-NEXT: #NO_APP
+; FRAME-SPILL-NEXT: popq %rax
+; FRAME-SPILL-NEXT: popq %rbp
+; FRAME-SPILL-NEXT: popq %r15
+; FRAME-SPILL-NEXT: popq %rbp
+; FRAME-SPILL-NEXT: retq
entry:
tail call void asm sideeffect "", "~{rbp},~{r15},~{dirflag},~{fpsr},~{flags}"()
ret void
@@ -103,15 +124,26 @@ define void @csr3() nounwind {
; FRAME-NEXT: pushq %rbp
; FRAME-NEXT: movq %rsp, %rbp
; FRAME-NEXT: push2 %r14, %r15
-; FRAME-NEXT: pushq %rbp
-; FRAME-NEXT: pushq %rax
; FRAME-NEXT: #APP
; FRAME-NEXT: #NO_APP
-; FRAME-NEXT: popq %rax
-; FRAME-NEXT: popq %rbp
; FRAME-NEXT: pop2 %r15, %r14
; FRAME-NEXT: popq %rbp
; FRAME-NEXT: retq
+;
+; FRAME-SPILL-LABEL: csr3:
+; FRAME-SPILL: # %bb.0: # %entry
+; FRAME-SPILL-NEXT: pushq %rbp
+; FRAME-SPILL-NEXT: movq %rsp, %rbp
+; FRAME-SPILL-NEXT: push2 %r14, %r15
+; FRAME-SPILL-NEXT: pushq %rbp
+; FRAME-SPILL-NEXT: pushq %rax
+; FRAME-SPILL-NEXT: #APP
+; FRAME-SPILL-NEXT: #NO_APP
+; FRAME-SPILL-NEXT: popq %rax
+; FRAME-SPILL-NEXT: popq %rbp
+; FRAME-SPILL-NEXT: pop2 %r15, %r14
+; FRAME-SPILL-NEXT: popq %rbp
+; FRAME-SPILL-NEXT: retq
entry:
tail call void asm sideeffect "", "~{rbp},~{r15},~{r14},~{dirflag},~{fpsr},~{flags}"()
ret void
@@ -148,16 +180,29 @@ define void @csr4() nounwind {
; FRAME-NEXT: movq %rsp, %rbp
; FRAME-NEXT: push2 %r14, %r15
; FRAME-NEXT: pushq %r13
-; FRAME-NEXT: pushq %rbp
-; FRAME-NEXT: pushq %rax
; FRAME-NEXT: #APP
; FRAME-NEXT: #NO_APP
-; FRAME-NEXT: popq %rax
-; FRAME-NEXT: popq %rbp
; FRAME-NEXT: popq %r13
; FRAME-NEXT: pop2 %r15, %r14
; FRAME-NEXT: popq %rbp
; FRAME-NEXT: retq
+;
+; FRAME-SPILL-LABEL: csr4:
+; FRAME-SPILL: # %bb.0: # %entry
+; FRAME-SPILL-NEXT: pushq %rbp
+; FRAME-SPILL-NEXT: movq %rsp, %rbp
+; FRAME-SPILL-NEXT: push2 %r14, %r15
+; FRAME-SPILL-NEXT: pushq %r13
+; FRAME-SPILL-NEXT: pushq %rbp
+; FRAME-SPILL-NEXT: pushq %rax
+; FRAME-SPILL-NEXT: #APP
+; FRAME-SPILL-NEXT: #NO_APP
+; FRAME-SPILL-NEXT: popq %rax
+; FRAME-SPILL-NEXT: popq %rbp
+; FRAME-SPILL-NEXT: popq %r13
+; FRAME-SPILL-NEXT: pop2 %r15, %r14
+; FRAME-SPILL-NEXT: popq %rbp
+; FRAME-SPILL-NEXT: retq
entry:
tail call void asm sideeffect "", "~{rbp},~{r15},~{r14},~{r13},~{dirflag},~{fpsr},~{flags}"()
ret void
@@ -194,16 +239,29 @@ define void @csr5() nounwind {
; FRAME-NEXT: movq %rsp, %rbp
; FRAME-NEXT: push2 %r14, %r15
; FRAME-NEXT: push2 %r12, %r13
-; FRAME-NEXT: pushq %rbp
-; FRAME-NEXT: pushq %rax
; FRAME-NEXT: #APP
; FRAME-NEXT: #NO_APP
-; FRAME-NEXT: popq %rax
-; FRAME-NEXT: popq %rbp
; FRAME-NEXT: pop2 %r13, %r12
; FRAME-NEXT: pop2 %r15, %r14
; FRAME-NEXT: popq %rbp
; FRAME-NEXT: retq
+;
+; FRAME-SPILL-LABEL: csr5:
+; FRAME-SPILL: # %bb.0: # %entry
+; FRAME-SPILL-NEXT: pushq %rbp
+; FRAME-SPILL-NEXT: movq %rsp, %rbp
+; FRAME-SPILL-NEXT: push2 %r14, %r15
+; FRAME-SPILL-NEXT: push2 %r12, %r13
+; FRAME-SPILL-NEXT: pushq %rbp
+; FRAME-SPILL-NEXT: pushq %rax
+; FRAME-SPILL-NEXT: #APP
+; FRAME-SPILL-NEXT: #NO_APP
+; FRAME-SPILL-NEXT: popq %rax
+; FRAME-SPILL-NEXT: popq %rbp
+; FRAME-SPILL-NEXT: pop2 %r13, %r12
+; FRAME-SPILL-NEXT: pop2 %r15, %r14
+; FRAME-SPILL-NEXT: popq %rbp
+; FRAME-SPILL-NEXT: retq
entry:
tail call void asm sideeffect "", "~{rbp},~{r15},~{r14},~{r13},~{r12},~{dirflag},~{fpsr},~{flags}"()
ret void
@@ -245,17 +303,32 @@ define void @csr6() nounwind {
; FRAME-NEXT: push2 %r14, %r15
; FRAME-NEXT: push2 %r12, %r13
; FRAME-NEXT: pushq %rbx
-; FRAME-NEXT: pushq %rbp
-; FRAME-NEXT: pushq %rax
; FRAME-NEXT: #APP
; FRAME-NEXT: #NO_APP
-; FRAME-NEXT: popq %rax
-; FRAME-NEXT: popq %rbp
; FRAME-NEXT: popq %rbx
; FRAME-NEXT: pop2 %r13, %r12
; FRAME-NEXT: pop2 %r15, %r14
; FRAME-NEXT: popq %rbp
; FRAME-NEXT: retq
+;
+; FRAME-SPILL-LABEL: csr6:
+; FRAME-SPILL: # %bb.0: # %entry
+; FRAME-SPILL-NEXT: pushq %rbp
+; FRAME-SPILL-NEXT: movq %rsp, %rbp
+; FRAME-SPILL-NEXT: push2 %r14, %r15
+; FRAME-SPILL-NEXT: push2 %r12, %r13
+; FRAME-SPILL-NEXT: pushq %rbx
+; FRAME-SPILL-NEXT: pushq %rbp
+; FRAME-SPILL-NEXT: pushq %rax
+; FRAME-SPILL-NEXT: #APP
+; FRAME-SPILL-NEXT: #NO_APP
+; FRAME-SPILL-NEXT: popq %rax
+; FRAME-SPILL-NEXT: popq %rbp
+; FRAME-SPILL-NEXT: popq %rbx
+; FRAME-SPILL-NEXT: pop2 %r13, %r12
+; FRAME-SPILL-NEXT: pop2 %r15, %r14
+; FRAME-SPILL-NEXT: popq %rbp
+; FRAME-SPILL-NEXT: retq
entry:
tail call void asm sideeffect "", "~{rbp},~{r15},~{r14},~{r13},~{r12},~{rbx},~{dirflag},~{fpsr},~{flags}"()
ret void
@@ -421,6 +494,60 @@ define void @lea_in_epilog(i1 %arg, ptr %arg1, ptr %arg2, i64 %arg3, i64 %arg4,
; FRAME-NEXT: movq $0, (%rax)
; FRAME-NEXT: .LBB6_5: # %bb14
; FRAME-NEXT: retq
+;
+; FRAME-SPILL-LABEL: lea_in_epilog:
+; FRAME-SPILL: # %bb.0: # %bb
+; FRAME-SPILL-NEXT: testb $1, %dil
+; FRAME-SPILL-NEXT: je .LBB6_5
+; FRAME-SPILL-NEXT: # %bb.1: # %bb13
+; FRAME-SPILL-NEXT: pushq %rbp
+; FRAME-SPILL-NEXT: movq %rsp, %rbp
+; FRAME-SPILL-NEXT: push2 %r14, %r15
+; FRAME-SPILL-NEXT: push2 %r12, %r13
+; FRAME-SPILL-NEXT: pushq %rbx
+; FRAME-SPILL-NEXT: subq $24, %rsp
+; FRAME-SPILL-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; FRAME-SPILL-NEXT: addq 16(%rbp), %r9
+; FRAME-SPILL-NEXT: movq 48(%rbp), %rbx
+; FRAME-SPILL-NEXT: addq %r9, %rbx
+; FRAME-SPILL-NEXT: movq 40(%rbp), %r12
+; FRAME-SPILL-NEXT: addq %r9, %r12
+; FRAME-SPILL-NEXT: movq 32(%rbp), %r15
+; FRAME-SPILL-NEXT: addq %r9, %r15
+; FRAME-SPILL-NEXT: xorl %r13d, %r13d
+; FRAME-SPILL-NEXT: xorl %r14d, %r14d
+; FRAME-SPILL-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; FRAME-SPILL-NEXT: .p2align 4
+; FRAME-SPILL-NEXT: .LBB6_2: # %bb15
+; FRAME-SPILL-NEXT: # =>This Inner Loop Header: Depth=1
+; FRAME-SPILL-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; FRAME-SPILL-NEXT: incq %r14
+; FRAME-SPILL-NEXT: movl $432, %edx # imm = 0x1B0
+; FRAME-SPILL-NEXT: xorl %edi, %edi
+; FRAME-SPILL-NEXT: movq %r12, %rsi
+; FRAME-SPILL-NEXT: callq memcpy at PLT
+; FRAME-SPILL-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload
+; FRAME-SPILL-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; FRAME-SPILL-NEXT: movq 16(%rbp), %rax
+; FRAME-SPILL-NEXT: addq %rax, %rbx
+; FRAME-SPILL-NEXT: addq %rax, %r12
+; FRAME-SPILL-NEXT: addq %rax, %r15
+; FRAME-SPILL-NEXT: addq %rax, %r9
+; FRAME-SPILL-NEXT: addq $8, %r13
+; FRAME-SPILL-NEXT: testb $1, %dil
+; FRAME-SPILL-NEXT: je .LBB6_2
+; FRAME-SPILL-NEXT: # %bb.3: # %bb11
+; FRAME-SPILL-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; FRAME-SPILL-NEXT: leaq {{[0-9]+}}(%rsp), %rsp
+; FRAME-SPILL-NEXT: popq %rbx
+; FRAME-SPILL-NEXT: pop2 %r13, %r12
+; FRAME-SPILL-NEXT: pop2 %r15, %r14
+; FRAME-SPILL-NEXT: popq %rbp
+; FRAME-SPILL-NEXT: jne .LBB6_5
+; FRAME-SPILL-NEXT: # %bb.4: # %bb12
+; FRAME-SPILL-NEXT: movq $0, (%rax)
+; FRAME-SPILL-NEXT: .LBB6_5: # %bb14
+; FRAME-SPILL-NEXT: retq
bb:
br i1 %arg, label %bb13, label %bb14
diff --git a/llvm/test/CodeGen/X86/apx/pushp-popp.ll b/llvm/test/CodeGen/X86/apx/pushp-popp.ll
index 625e70b07198e8..4097c59c56437b 100644
--- a/llvm/test/CodeGen/X86/apx/pushp-popp.ll
+++ b/llvm/test/CodeGen/X86/apx/pushp-popp.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ppx | FileCheck %s --check-prefix=CHECK
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ppx -frame-pointer=all | FileCheck %s --check-prefix=FRAME
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ppx -frame-pointer=all --enable-spill-fpbp=true | FileCheck %s --check-prefix=FRAME-SPILL
define void @csr2() nounwind {
; CHECK-LABEL: csr2:
@@ -18,15 +19,26 @@ define void @csr2() nounwind {
; FRAME-NEXT: pushp %rbp
; FRAME-NEXT: movq %rsp, %rbp
; FRAME-NEXT: pushp %r15
-; FRAME-NEXT: pushp %rbp
-; FRAME-NEXT: pushq %rax
; FRAME-NEXT: #APP
; FRAME-NEXT: #NO_APP
-; FRAME-NEXT: popq %rax
-; FRAME-NEXT: popp %rbp
; FRAME-NEXT: popp %r15
; FRAME-NEXT: popp %rbp
; FRAME-NEXT: retq
+;
+; FRAME-SPILL-LABEL: csr2:
+; FRAME-SPILL: # %bb.0: # %entry
+; FRAME-SPILL-NEXT: pushp %rbp
+; FRAME-SPILL-NEXT: movq %rsp, %rbp
+; FRAME-SPILL-NEXT: pushp %r15
+; FRAME-SPILL-NEXT: pushp %rbp
+; FRAME-SPILL-NEXT: pushq %rax
+; FRAME-SPILL-NEXT: #APP
+; FRAME-SPILL-NEXT: #NO_APP
+; FRAME-SPILL-NEXT: popq %rax
+; FRAME-SPILL-NEXT: popp %rbp
+; FRAME-SPILL-NEXT: popp %r15
+; FRAME-SPILL-NEXT: popp %rbp
+; FRAME-SPILL-NEXT: retq
entry:
tail call void asm sideeffect "", "~{rbp},~{r15},~{dirflag},~{fpsr},~{flags}"()
ret void
diff --git a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll
index 6c68279b8d04ae..3873b920d4de69 100644
--- a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll
+++ b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll
@@ -6,7 +6,9 @@
; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=knl | FileCheck %s -check-prefixes=WIN64,WIN64-KNL
; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=skx | FileCheck %s -check-prefixes=WIN64,WIN64-SKX
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s -check-prefixes=X64,X64-KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --enable-spill-fpbp=true | FileCheck %s -check-prefixes=X64-SPILL,X64-SPILL-KNL
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s -check-prefixes=X64,X64-SKX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx --enable-spill-fpbp=true | FileCheck %s -check-prefixes=X64-SPILL,X64-SPILL-SKX
declare <16 x float> @func_float16_ptr(<16 x float>, ptr)
declare <16 x float> @func_float16(<16 x float>, <16 x float>)
@@ -70,17 +72,35 @@ define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
; X64-NEXT: subq $128, %rsp
; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0
; X64-NEXT: movq %rsp, %rdi
-; X64-NEXT: pushq %rbp
-; X64-NEXT: pushq %rax
; X64-NEXT: callq _func_float16_ptr
-; X64-NEXT: addq $8, %rsp
-; X64-NEXT: popq %rbp
; X64-NEXT: vaddps (%rsp), %zmm0, %zmm0
; X64-NEXT: leaq -16(%rbp), %rsp
; X64-NEXT: popq %r12
; X64-NEXT: popq %r13
; X64-NEXT: popq %rbp
; X64-NEXT: retq
+;
+; X64-SPILL-LABEL: testf16_inp:
+; X64-SPILL: ## %bb.0:
+; X64-SPILL-NEXT: pushq %rbp
+; X64-SPILL-NEXT: movq %rsp, %rbp
+; X64-SPILL-NEXT: pushq %r13
+; X64-SPILL-NEXT: pushq %r12
+; X64-SPILL-NEXT: andq $-64, %rsp
+; X64-SPILL-NEXT: subq $128, %rsp
+; X64-SPILL-NEXT: vaddps %zmm1, %zmm0, %zmm0
+; X64-SPILL-NEXT: movq %rsp, %rdi
+; X64-SPILL-NEXT: pushq %rbp
+; X64-SPILL-NEXT: pushq %rax
+; X64-SPILL-NEXT: callq _func_float16_ptr
+; X64-SPILL-NEXT: addq $8, %rsp
+; X64-SPILL-NEXT: popq %rbp
+; X64-SPILL-NEXT: vaddps (%rsp), %zmm0, %zmm0
+; X64-SPILL-NEXT: leaq -16(%rbp), %rsp
+; X64-SPILL-NEXT: popq %r12
+; X64-SPILL-NEXT: popq %r13
+; X64-SPILL-NEXT: popq %rbp
+; X64-SPILL-NEXT: retq
%y = alloca <16 x float>, align 64
%x = fadd <16 x float> %a, %b
%1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, ptr %y)
@@ -154,11 +174,7 @@ define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
; X64-NEXT: vmovaps %zmm1, %zmm16
; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0
; X64-NEXT: movq %rsp, %rdi
-; X64-NEXT: pushq %rbp
-; X64-NEXT: pushq %rax
; X64-NEXT: callq _func_float16_ptr
-; X64-NEXT: addq $8, %rsp
-; X64-NEXT: popq %rbp
; X64-NEXT: vaddps %zmm16, %zmm0, %zmm0
; X64-NEXT: vaddps (%rsp), %zmm0, %zmm0
; X64-NEXT: leaq -16(%rbp), %rsp
@@ -166,6 +182,30 @@ define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
; X64-NEXT: popq %r13
; X64-NEXT: popq %rbp
; X64-NEXT: retq
+;
+; X64-SPILL-LABEL: testf16_regs:
+; X64-SPILL: ## %bb.0:
+; X64-SPILL-NEXT: pushq %rbp
+; X64-SPILL-NEXT: movq %rsp, %rbp
+; X64-SPILL-NEXT: pushq %r13
+; X64-SPILL-NEXT: pushq %r12
+; X64-SPILL-NEXT: andq $-64, %rsp
+; X64-SPILL-NEXT: subq $128, %rsp
+; X64-SPILL-NEXT: vmovaps %zmm1, %zmm16
+; X64-SPILL-NEXT: vaddps %zmm1, %zmm0, %zmm0
+; X64-SPILL-NEXT: movq %rsp, %rdi
+; X64-SPILL-NEXT: pushq %rbp
+; X64-SPILL-NEXT: pushq %rax
+; X64-SPILL-NEXT: callq _func_float16_ptr
+; X64-SPILL-NEXT: addq $8, %rsp
+; X64-SPILL-NEXT: popq %rbp
+; X64-SPILL-NEXT: vaddps %zmm16, %zmm0, %zmm0
+; X64-SPILL-NEXT: vaddps (%rsp), %zmm0, %zmm0
+; X64-SPILL-NEXT: leaq -16(%rbp), %rsp
+; X64-SPILL-NEXT: popq %r12
+; X64-SPILL-NEXT: popq %r13
+; X64-SPILL-NEXT: popq %rbp
+; X64-SPILL-NEXT: retq
%y = alloca <16 x float>, align 64
%x = fadd <16 x float> %a, %b
%1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, ptr %y)
@@ -348,6 +388,55 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl
; X64-KNL-NEXT: popq %rsi
; X64-KNL-NEXT: retq
;
+; X64-SPILL-KNL-LABEL: test_prolog_epilog:
+; X64-SPILL-KNL: ## %bb.0:
+; X64-SPILL-KNL-NEXT: pushq %rsi
+; X64-SPILL-KNL-NEXT: subq $1072, %rsp ## imm = 0x430
+; X64-SPILL-KNL-NEXT: kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
+; X64-SPILL-KNL-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
+; X64-SPILL-KNL-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
+; X64-SPILL-KNL-NEXT: kmovw %k4, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
+; X64-SPILL-KNL-NEXT: vmovups %zmm31, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-KNL-NEXT: vmovups %zmm30, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-KNL-NEXT: vmovups %zmm29, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-KNL-NEXT: vmovups %zmm28, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-KNL-NEXT: vmovups %zmm27, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-KNL-NEXT: vmovups %zmm26, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-KNL-NEXT: vmovups %zmm25, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-KNL-NEXT: vmovups %zmm24, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byt...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/114791
More information about the llvm-commits
mailing list