[llvm] Spill/Restore FP/BP under option (PR #114791)

Mon Nov 4 05:14:39 PST 2024

llvmbot wrote:




@llvm/pr-subscribers-backend-x86

Author: Mahesh-Attarde (mahesh-attarde)

<details>
<summary>Changes</summary>

With Change https://github.com/llvm/llvm-project/pull/81048
There are more than 70+ Application Runtime fails for X86 Target.
Moving this change under option.

---

Patch is 46.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/114791.diff


13 Files Affected:

- (modified) llvm/lib/CodeGen/PrologEpilogInserter.cpp (+7-1) 
- (modified) llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll (+29-4) 
- (modified) llvm/test/CodeGen/X86/apx/push2-pop2.ll (+151-24) 
- (modified) llvm/test/CodeGen/X86/apx/pushp-popp.ll (+16-4) 
- (modified) llvm/test/CodeGen/X86/avx512-intel-ocl.ll (+191-8) 
- (modified) llvm/test/CodeGen/X86/clobber_base_ptr.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/clobber_frame_ptr.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/clobber_frame_ptr2.ll (+2-1) 
- (modified) llvm/test/CodeGen/X86/clobber_frame_ptr_x32.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/i386-baseptr.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll (+28-2) 
- (modified) llvm/test/CodeGen/X86/x86-32-intrcc.ll (+1-3) 
- (modified) llvm/test/CodeGen/X86/x86-64-baseptr.ll (+182-12) 


``````````diff

diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index ee03eaa8ae527c..da9385ce4c96d4 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -77,6 +77,11 @@ using MBBVector = SmallVector<MachineBasicBlock *, 4>;
 STATISTIC(NumLeafFuncWithSpills, "Number of leaf functions with CSRs");
 STATISTIC(NumFuncSeen, "Number of functions seen in PEI");
 
+// Experimental Feature enables spilling and reload FP/BP
+static cl::opt<bool>
+    EnableSpillFPBP("enable-spill-fpbp",
+                    cl::desc("Spill clobbered fp register to stack."),
+                    cl::init(false), cl::Hidden);
 
 namespace {
 
@@ -231,7 +236,8 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
   // Spill frame pointer and/or base pointer registers if they are clobbered.
   // It is placed before call frame instruction elimination so it will not mess
   // with stack arguments.
-  TFI->spillFPBP(MF);
+  if (EnableSpillFPBP)
+    TFI->spillFPBP(MF);
 
   // Calculate the MaxCallFrameSize value for the function's frame
   // information. Also eliminates call frame pseudo instructions.
diff --git a/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll b/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll
index f20c4c1ae27867..b4b205bd292276 100644
--- a/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll
+++ b/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll
@@ -2,6 +2,7 @@
 ; Check PUSH2/POP2 is not used for vector registers
 ; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+push2pop2 | FileCheck %s --check-prefix=CHECK
 ; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+push2pop2 -frame-pointer=all | FileCheck %s --check-prefix=FRAME
+; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+push2pop2 -frame-pointer=all  --enable-spill-fpbp=true | FileCheck %s --check-prefix=FRAME-SPILL
 
 define void @widget(float %arg) nounwind {
 ; CHECK-LABEL: widget:
@@ -43,17 +44,41 @@ define void @widget(float %arg) nounwind {
 ; FRAME-NEXT:    xorl %r8d, %r8d
 ; FRAME-NEXT:    callq *%rsi
 ; FRAME-NEXT:    movss %xmm6, 0
-; FRAME-NEXT:    pushq %rbp
-; FRAME-NEXT:    pushq %rax
 ; FRAME-NEXT:    #APP
 ; FRAME-NEXT:    #NO_APP
-; FRAME-NEXT:    popq %rax
-; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
 ; FRAME-NEXT:    addq $48, %rsp
 ; FRAME-NEXT:    pop2 %r15, %rsi
 ; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    retq
+;
+; FRAME-SPILL-LABEL: widget:
+; FRAME-SPILL:       # %bb.0: # %bb
+; FRAME-SPILL-NEXT:    pushq %rbp
+; FRAME-SPILL-NEXT:    push2 %rsi, %r15
+; FRAME-SPILL-NEXT:    subq $48, %rsp
+; FRAME-SPILL-NEXT:    leaq {{[0-9]+}}(%rsp), %rbp
+; FRAME-SPILL-NEXT:    movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; FRAME-SPILL-NEXT:    movaps %xmm0, %xmm6
+; FRAME-SPILL-NEXT:    xorl %esi, %esi
+; FRAME-SPILL-NEXT:    xorl %ecx, %ecx
+; FRAME-SPILL-NEXT:    callq *%rsi
+; FRAME-SPILL-NEXT:    xorl %ecx, %ecx
+; FRAME-SPILL-NEXT:    xorl %edx, %edx
+; FRAME-SPILL-NEXT:    xorl %r8d, %r8d
+; FRAME-SPILL-NEXT:    callq *%rsi
+; FRAME-SPILL-NEXT:    movss %xmm6, 0
+; FRAME-SPILL-NEXT:    pushq %rbp
+; FRAME-SPILL-NEXT:    pushq %rax
+; FRAME-SPILL-NEXT:    #APP
+; FRAME-SPILL-NEXT:    #NO_APP
+; FRAME-SPILL-NEXT:    popq %rax
+; FRAME-SPILL-NEXT:    popq %rbp
+; FRAME-SPILL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
+; FRAME-SPILL-NEXT:    addq $48, %rsp
+; FRAME-SPILL-NEXT:    pop2 %r15, %rsi
+; FRAME-SPILL-NEXT:    popq %rbp
+; FRAME-SPILL-NEXT:    retq
 bb:
   %call = tail call float null(ptr null)
   %call1 = tail call i32 null(ptr null, i32 0, i32 0)
diff --git a/llvm/test/CodeGen/X86/apx/push2-pop2.ll b/llvm/test/CodeGen/X86/apx/push2-pop2.ll
index f5be484be2b1a6..d6bb1a24aa6b7b 100644
--- a/llvm/test/CodeGen/X86/apx/push2-pop2.ll
+++ b/llvm/test/CodeGen/X86/apx/push2-pop2.ll
@@ -2,6 +2,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+push2pop2 | FileCheck %s --check-prefix=CHECK
 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+push2pop2,+ppx | FileCheck %s --check-prefix=PPX
 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+push2pop2 -frame-pointer=all | FileCheck %s --check-prefix=FRAME
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+push2pop2 -frame-pointer=all --enable-spill-fpbp=true | FileCheck %s --check-prefix=FRAME-SPILL
 
 define void @csr1() nounwind {
 ; CHECK-LABEL: csr1:
@@ -24,14 +25,23 @@ define void @csr1() nounwind {
 ; FRAME:       # %bb.0: # %entry
 ; FRAME-NEXT:    pushq %rbp
 ; FRAME-NEXT:    movq %rsp, %rbp
-; FRAME-NEXT:    pushq %rbp
-; FRAME-NEXT:    pushq %rax
 ; FRAME-NEXT:    #APP
 ; FRAME-NEXT:    #NO_APP
-; FRAME-NEXT:    popq %rax
-; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    retq
+;
+; FRAME-SPILL-LABEL: csr1:
+; FRAME-SPILL:       # %bb.0: # %entry
+; FRAME-SPILL-NEXT:    pushq %rbp
+; FRAME-SPILL-NEXT:    movq %rsp, %rbp
+; FRAME-SPILL-NEXT:    pushq %rbp
+; FRAME-SPILL-NEXT:    pushq %rax
+; FRAME-SPILL-NEXT:    #APP
+; FRAME-SPILL-NEXT:    #NO_APP
+; FRAME-SPILL-NEXT:    popq %rax
+; FRAME-SPILL-NEXT:    popq %rbp
+; FRAME-SPILL-NEXT:    popq %rbp
+; FRAME-SPILL-NEXT:    retq
 entry:
   tail call void asm sideeffect "", "~{rbp},~{dirflag},~{fpsr},~{flags}"()
   ret void
@@ -63,15 +73,26 @@ define void @csr2() nounwind {
 ; FRAME-NEXT:    pushq %rbp
 ; FRAME-NEXT:    movq %rsp, %rbp
 ; FRAME-NEXT:    pushq %r15
-; FRAME-NEXT:    pushq %rbp
-; FRAME-NEXT:    pushq %rax
 ; FRAME-NEXT:    #APP
 ; FRAME-NEXT:    #NO_APP
-; FRAME-NEXT:    popq %rax
-; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    popq %r15
 ; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    retq
+;
+; FRAME-SPILL-LABEL: csr2:
+; FRAME-SPILL:       # %bb.0: # %entry
+; FRAME-SPILL-NEXT:    pushq %rbp
+; FRAME-SPILL-NEXT:    movq %rsp, %rbp
+; FRAME-SPILL-NEXT:    pushq %r15
+; FRAME-SPILL-NEXT:    pushq %rbp
+; FRAME-SPILL-NEXT:    pushq %rax
+; FRAME-SPILL-NEXT:    #APP
+; FRAME-SPILL-NEXT:    #NO_APP
+; FRAME-SPILL-NEXT:    popq %rax
+; FRAME-SPILL-NEXT:    popq %rbp
+; FRAME-SPILL-NEXT:    popq %r15
+; FRAME-SPILL-NEXT:    popq %rbp
+; FRAME-SPILL-NEXT:    retq
 entry:
   tail call void asm sideeffect "", "~{rbp},~{r15},~{dirflag},~{fpsr},~{flags}"()
   ret void
@@ -103,15 +124,26 @@ define void @csr3() nounwind {
 ; FRAME-NEXT:    pushq %rbp
 ; FRAME-NEXT:    movq %rsp, %rbp
 ; FRAME-NEXT:    push2 %r14, %r15
-; FRAME-NEXT:    pushq %rbp
-; FRAME-NEXT:    pushq %rax
 ; FRAME-NEXT:    #APP
 ; FRAME-NEXT:    #NO_APP
-; FRAME-NEXT:    popq %rax
-; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    pop2 %r15, %r14
 ; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    retq
+;
+; FRAME-SPILL-LABEL: csr3:
+; FRAME-SPILL:       # %bb.0: # %entry
+; FRAME-SPILL-NEXT:    pushq %rbp
+; FRAME-SPILL-NEXT:    movq %rsp, %rbp
+; FRAME-SPILL-NEXT:    push2 %r14, %r15
+; FRAME-SPILL-NEXT:    pushq %rbp
+; FRAME-SPILL-NEXT:    pushq %rax
+; FRAME-SPILL-NEXT:    #APP
+; FRAME-SPILL-NEXT:    #NO_APP
+; FRAME-SPILL-NEXT:    popq %rax
+; FRAME-SPILL-NEXT:    popq %rbp
+; FRAME-SPILL-NEXT:    pop2 %r15, %r14
+; FRAME-SPILL-NEXT:    popq %rbp
+; FRAME-SPILL-NEXT:    retq
 entry:
   tail call void asm sideeffect "", "~{rbp},~{r15},~{r14},~{dirflag},~{fpsr},~{flags}"()
   ret void
@@ -148,16 +180,29 @@ define void @csr4() nounwind {
 ; FRAME-NEXT:    movq %rsp, %rbp
 ; FRAME-NEXT:    push2 %r14, %r15
 ; FRAME-NEXT:    pushq %r13
-; FRAME-NEXT:    pushq %rbp
-; FRAME-NEXT:    pushq %rax
 ; FRAME-NEXT:    #APP
 ; FRAME-NEXT:    #NO_APP
-; FRAME-NEXT:    popq %rax
-; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    popq %r13
 ; FRAME-NEXT:    pop2 %r15, %r14
 ; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    retq
+;
+; FRAME-SPILL-LABEL: csr4:
+; FRAME-SPILL:       # %bb.0: # %entry
+; FRAME-SPILL-NEXT:    pushq %rbp
+; FRAME-SPILL-NEXT:    movq %rsp, %rbp
+; FRAME-SPILL-NEXT:    push2 %r14, %r15
+; FRAME-SPILL-NEXT:    pushq %r13
+; FRAME-SPILL-NEXT:    pushq %rbp
+; FRAME-SPILL-NEXT:    pushq %rax
+; FRAME-SPILL-NEXT:    #APP
+; FRAME-SPILL-NEXT:    #NO_APP
+; FRAME-SPILL-NEXT:    popq %rax
+; FRAME-SPILL-NEXT:    popq %rbp
+; FRAME-SPILL-NEXT:    popq %r13
+; FRAME-SPILL-NEXT:    pop2 %r15, %r14
+; FRAME-SPILL-NEXT:    popq %rbp
+; FRAME-SPILL-NEXT:    retq
 entry:
   tail call void asm sideeffect "", "~{rbp},~{r15},~{r14},~{r13},~{dirflag},~{fpsr},~{flags}"()
   ret void
@@ -194,16 +239,29 @@ define void @csr5() nounwind {
 ; FRAME-NEXT:    movq %rsp, %rbp
 ; FRAME-NEXT:    push2 %r14, %r15
 ; FRAME-NEXT:    push2 %r12, %r13
-; FRAME-NEXT:    pushq %rbp
-; FRAME-NEXT:    pushq %rax
 ; FRAME-NEXT:    #APP
 ; FRAME-NEXT:    #NO_APP
-; FRAME-NEXT:    popq %rax
-; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    pop2 %r13, %r12
 ; FRAME-NEXT:    pop2 %r15, %r14
 ; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    retq
+;
+; FRAME-SPILL-LABEL: csr5:
+; FRAME-SPILL:       # %bb.0: # %entry
+; FRAME-SPILL-NEXT:    pushq %rbp
+; FRAME-SPILL-NEXT:    movq %rsp, %rbp
+; FRAME-SPILL-NEXT:    push2 %r14, %r15
+; FRAME-SPILL-NEXT:    push2 %r12, %r13
+; FRAME-SPILL-NEXT:    pushq %rbp
+; FRAME-SPILL-NEXT:    pushq %rax
+; FRAME-SPILL-NEXT:    #APP
+; FRAME-SPILL-NEXT:    #NO_APP
+; FRAME-SPILL-NEXT:    popq %rax
+; FRAME-SPILL-NEXT:    popq %rbp
+; FRAME-SPILL-NEXT:    pop2 %r13, %r12
+; FRAME-SPILL-NEXT:    pop2 %r15, %r14
+; FRAME-SPILL-NEXT:    popq %rbp
+; FRAME-SPILL-NEXT:    retq
 entry:
   tail call void asm sideeffect "", "~{rbp},~{r15},~{r14},~{r13},~{r12},~{dirflag},~{fpsr},~{flags}"()
   ret void
@@ -245,17 +303,32 @@ define void @csr6() nounwind {
 ; FRAME-NEXT:    push2 %r14, %r15
 ; FRAME-NEXT:    push2 %r12, %r13
 ; FRAME-NEXT:    pushq %rbx
-; FRAME-NEXT:    pushq %rbp
-; FRAME-NEXT:    pushq %rax
 ; FRAME-NEXT:    #APP
 ; FRAME-NEXT:    #NO_APP
-; FRAME-NEXT:    popq %rax
-; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    popq %rbx
 ; FRAME-NEXT:    pop2 %r13, %r12
 ; FRAME-NEXT:    pop2 %r15, %r14
 ; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    retq
+;
+; FRAME-SPILL-LABEL: csr6:
+; FRAME-SPILL:       # %bb.0: # %entry
+; FRAME-SPILL-NEXT:    pushq %rbp
+; FRAME-SPILL-NEXT:    movq %rsp, %rbp
+; FRAME-SPILL-NEXT:    push2 %r14, %r15
+; FRAME-SPILL-NEXT:    push2 %r12, %r13
+; FRAME-SPILL-NEXT:    pushq %rbx
+; FRAME-SPILL-NEXT:    pushq %rbp
+; FRAME-SPILL-NEXT:    pushq %rax
+; FRAME-SPILL-NEXT:    #APP
+; FRAME-SPILL-NEXT:    #NO_APP
+; FRAME-SPILL-NEXT:    popq %rax
+; FRAME-SPILL-NEXT:    popq %rbp
+; FRAME-SPILL-NEXT:    popq %rbx
+; FRAME-SPILL-NEXT:    pop2 %r13, %r12
+; FRAME-SPILL-NEXT:    pop2 %r15, %r14
+; FRAME-SPILL-NEXT:    popq %rbp
+; FRAME-SPILL-NEXT:    retq
 entry:
   tail call void asm sideeffect "", "~{rbp},~{r15},~{r14},~{r13},~{r12},~{rbx},~{dirflag},~{fpsr},~{flags}"()
   ret void
@@ -421,6 +494,60 @@ define void @lea_in_epilog(i1 %arg, ptr %arg1, ptr %arg2, i64 %arg3, i64 %arg4,
 ; FRAME-NEXT:    movq $0, (%rax)
 ; FRAME-NEXT:  .LBB6_5: # %bb14
 ; FRAME-NEXT:    retq
+;
+; FRAME-SPILL-LABEL: lea_in_epilog:
+; FRAME-SPILL:       # %bb.0: # %bb
+; FRAME-SPILL-NEXT:    testb $1, %dil
+; FRAME-SPILL-NEXT:    je .LBB6_5
+; FRAME-SPILL-NEXT:  # %bb.1: # %bb13
+; FRAME-SPILL-NEXT:    pushq %rbp
+; FRAME-SPILL-NEXT:    movq %rsp, %rbp
+; FRAME-SPILL-NEXT:    push2 %r14, %r15
+; FRAME-SPILL-NEXT:    push2 %r12, %r13
+; FRAME-SPILL-NEXT:    pushq %rbx
+; FRAME-SPILL-NEXT:    subq $24, %rsp
+; FRAME-SPILL-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; FRAME-SPILL-NEXT:    addq 16(%rbp), %r9
+; FRAME-SPILL-NEXT:    movq 48(%rbp), %rbx
+; FRAME-SPILL-NEXT:    addq %r9, %rbx
+; FRAME-SPILL-NEXT:    movq 40(%rbp), %r12
+; FRAME-SPILL-NEXT:    addq %r9, %r12
+; FRAME-SPILL-NEXT:    movq 32(%rbp), %r15
+; FRAME-SPILL-NEXT:    addq %r9, %r15
+; FRAME-SPILL-NEXT:    xorl %r13d, %r13d
+; FRAME-SPILL-NEXT:    xorl %r14d, %r14d
+; FRAME-SPILL-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; FRAME-SPILL-NEXT:    .p2align 4
+; FRAME-SPILL-NEXT:  .LBB6_2: # %bb15
+; FRAME-SPILL-NEXT:    # =>This Inner Loop Header: Depth=1
+; FRAME-SPILL-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; FRAME-SPILL-NEXT:    incq %r14
+; FRAME-SPILL-NEXT:    movl $432, %edx # imm = 0x1B0
+; FRAME-SPILL-NEXT:    xorl %edi, %edi
+; FRAME-SPILL-NEXT:    movq %r12, %rsi
+; FRAME-SPILL-NEXT:    callq memcpy at PLT
+; FRAME-SPILL-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload
+; FRAME-SPILL-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; FRAME-SPILL-NEXT:    movq 16(%rbp), %rax
+; FRAME-SPILL-NEXT:    addq %rax, %rbx
+; FRAME-SPILL-NEXT:    addq %rax, %r12
+; FRAME-SPILL-NEXT:    addq %rax, %r15
+; FRAME-SPILL-NEXT:    addq %rax, %r9
+; FRAME-SPILL-NEXT:    addq $8, %r13
+; FRAME-SPILL-NEXT:    testb $1, %dil
+; FRAME-SPILL-NEXT:    je .LBB6_2
+; FRAME-SPILL-NEXT:  # %bb.3: # %bb11
+; FRAME-SPILL-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; FRAME-SPILL-NEXT:    leaq {{[0-9]+}}(%rsp), %rsp
+; FRAME-SPILL-NEXT:    popq %rbx
+; FRAME-SPILL-NEXT:    pop2 %r13, %r12
+; FRAME-SPILL-NEXT:    pop2 %r15, %r14
+; FRAME-SPILL-NEXT:    popq %rbp
+; FRAME-SPILL-NEXT:    jne .LBB6_5
+; FRAME-SPILL-NEXT:  # %bb.4: # %bb12
+; FRAME-SPILL-NEXT:    movq $0, (%rax)
+; FRAME-SPILL-NEXT:  .LBB6_5: # %bb14
+; FRAME-SPILL-NEXT:    retq
 bb:
   br i1 %arg, label %bb13, label %bb14
 
diff --git a/llvm/test/CodeGen/X86/apx/pushp-popp.ll b/llvm/test/CodeGen/X86/apx/pushp-popp.ll
index 625e70b07198e8..4097c59c56437b 100644
--- a/llvm/test/CodeGen/X86/apx/pushp-popp.ll
+++ b/llvm/test/CodeGen/X86/apx/pushp-popp.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ppx | FileCheck %s --check-prefix=CHECK
 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ppx -frame-pointer=all | FileCheck %s --check-prefix=FRAME
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ppx -frame-pointer=all --enable-spill-fpbp=true | FileCheck %s --check-prefix=FRAME-SPILL
 
 define void @csr2() nounwind {
 ; CHECK-LABEL: csr2:
@@ -18,15 +19,26 @@ define void @csr2() nounwind {
 ; FRAME-NEXT:    pushp %rbp
 ; FRAME-NEXT:    movq %rsp, %rbp
 ; FRAME-NEXT:    pushp %r15
-; FRAME-NEXT:    pushp %rbp
-; FRAME-NEXT:    pushq %rax
 ; FRAME-NEXT:    #APP
 ; FRAME-NEXT:    #NO_APP
-; FRAME-NEXT:    popq %rax
-; FRAME-NEXT:    popp %rbp
 ; FRAME-NEXT:    popp %r15
 ; FRAME-NEXT:    popp %rbp
 ; FRAME-NEXT:    retq
+;
+; FRAME-SPILL-LABEL: csr2:
+; FRAME-SPILL:       # %bb.0: # %entry
+; FRAME-SPILL-NEXT:    pushp %rbp
+; FRAME-SPILL-NEXT:    movq %rsp, %rbp
+; FRAME-SPILL-NEXT:    pushp %r15
+; FRAME-SPILL-NEXT:    pushp %rbp
+; FRAME-SPILL-NEXT:    pushq %rax
+; FRAME-SPILL-NEXT:    #APP
+; FRAME-SPILL-NEXT:    #NO_APP
+; FRAME-SPILL-NEXT:    popq %rax
+; FRAME-SPILL-NEXT:    popp %rbp
+; FRAME-SPILL-NEXT:    popp %r15
+; FRAME-SPILL-NEXT:    popp %rbp
+; FRAME-SPILL-NEXT:    retq
 entry:
   tail call void asm sideeffect "", "~{rbp},~{r15},~{dirflag},~{fpsr},~{flags}"()
   ret void
diff --git a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll
index 6c68279b8d04ae..3873b920d4de69 100644
--- a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll
+++ b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll
@@ -6,7 +6,9 @@
 ; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=knl | FileCheck %s -check-prefixes=WIN64,WIN64-KNL
 ; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=skx | FileCheck %s -check-prefixes=WIN64,WIN64-SKX
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s -check-prefixes=X64,X64-KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --enable-spill-fpbp=true | FileCheck %s -check-prefixes=X64-SPILL,X64-SPILL-KNL
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s -check-prefixes=X64,X64-SKX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx --enable-spill-fpbp=true | FileCheck %s -check-prefixes=X64-SPILL,X64-SPILL-SKX
 
 declare <16 x float> @func_float16_ptr(<16 x float>, ptr)
 declare <16 x float> @func_float16(<16 x float>, <16 x float>)
@@ -70,17 +72,35 @@ define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
 ; X64-NEXT:    subq $128, %rsp
 ; X64-NEXT:    vaddps %zmm1, %zmm0, %zmm0
 ; X64-NEXT:    movq %rsp, %rdi
-; X64-NEXT:    pushq %rbp
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    callq _func_float16_ptr
-; X64-NEXT:    addq $8, %rsp
-; X64-NEXT:    popq %rbp
 ; X64-NEXT:    vaddps (%rsp), %zmm0, %zmm0
 ; X64-NEXT:    leaq -16(%rbp), %rsp
 ; X64-NEXT:    popq %r12
 ; X64-NEXT:    popq %r13
 ; X64-NEXT:    popq %rbp
 ; X64-NEXT:    retq
+;
+; X64-SPILL-LABEL: testf16_inp:
+; X64-SPILL:       ## %bb.0:
+; X64-SPILL-NEXT:    pushq %rbp
+; X64-SPILL-NEXT:    movq %rsp, %rbp
+; X64-SPILL-NEXT:    pushq %r13
+; X64-SPILL-NEXT:    pushq %r12
+; X64-SPILL-NEXT:    andq $-64, %rsp
+; X64-SPILL-NEXT:    subq $128, %rsp
+; X64-SPILL-NEXT:    vaddps %zmm1, %zmm0, %zmm0
+; X64-SPILL-NEXT:    movq %rsp, %rdi
+; X64-SPILL-NEXT:    pushq %rbp
+; X64-SPILL-NEXT:    pushq %rax
+; X64-SPILL-NEXT:    callq _func_float16_ptr
+; X64-SPILL-NEXT:    addq $8, %rsp
+; X64-SPILL-NEXT:    popq %rbp
+; X64-SPILL-NEXT:    vaddps (%rsp), %zmm0, %zmm0
+; X64-SPILL-NEXT:    leaq -16(%rbp), %rsp
+; X64-SPILL-NEXT:    popq %r12
+; X64-SPILL-NEXT:    popq %r13
+; X64-SPILL-NEXT:    popq %rbp
+; X64-SPILL-NEXT:    retq
   %y = alloca <16 x float>, align 64
   %x = fadd <16 x float> %a, %b
   %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, ptr %y)
@@ -154,11 +174,7 @@ define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
 ; X64-NEXT:    vmovaps %zmm1, %zmm16
 ; X64-NEXT:    vaddps %zmm1, %zmm0, %zmm0
 ; X64-NEXT:    movq %rsp, %rdi
-; X64-NEXT:    pushq %rbp
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    callq _func_float16_ptr
-; X64-NEXT:    addq $8, %rsp
-; X64-NEXT:    popq %rbp
 ; X64-NEXT:    vaddps %zmm16, %zmm0, %zmm0
 ; X64-NEXT:    vaddps (%rsp), %zmm0, %zmm0
 ; X64-NEXT:    leaq -16(%rbp), %rsp
@@ -166,6 +182,30 @@ define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
 ; X64-NEXT:    popq %r13
 ; X64-NEXT:    popq %rbp
 ; X64-NEXT:    retq
+;
+; X64-SPILL-LABEL: testf16_regs:
+; X64-SPILL:       ## %bb.0:
+; X64-SPILL-NEXT:    pushq %rbp
+; X64-SPILL-NEXT:    movq %rsp, %rbp
+; X64-SPILL-NEXT:    pushq %r13
+; X64-SPILL-NEXT:    pushq %r12
+; X64-SPILL-NEXT:    andq $-64, %rsp
+; X64-SPILL-NEXT:    subq $128, %rsp
+; X64-SPILL-NEXT:    vmovaps %zmm1, %zmm16
+; X64-SPILL-NEXT:    vaddps %zmm1, %zmm0, %zmm0
+; X64-SPILL-NEXT:    movq %rsp, %rdi
+; X64-SPILL-NEXT:    pushq %rbp
+; X64-SPILL-NEXT:    pushq %rax
+; X64-SPILL-NEXT:    callq _func_float16_ptr
+; X64-SPILL-NEXT:    addq $8, %rsp
+; X64-SPILL-NEXT:    popq %rbp
+; X64-SPILL-NEXT:    vaddps %zmm16, %zmm0, %zmm0
+; X64-SPILL-NEXT:    vaddps (%rsp), %zmm0, %zmm0
+; X64-SPILL-NEXT:    leaq -16(%rbp), %rsp
+; X64-SPILL-NEXT:    popq %r12
+; X64-SPILL-NEXT:    popq %r13
+; X64-SPILL-NEXT:    popq %rbp
+; X64-SPILL-NEXT:    retq
   %y = alloca <16 x float>, align 64
   %x = fadd <16 x float> %a, %b
   %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, ptr %y)
@@ -348,6 +388,55 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl
 ; X64-KNL-NEXT:    popq %rsi
 ; X64-KNL-NEXT:    retq
 ;
+; X64-SPILL-KNL-LABEL: test_prolog_epilog:
+; X64-SPILL-KNL:       ## %bb.0:
+; X64-SPILL-KNL-NEXT:    pushq %rsi
+; X64-SPILL-KNL-NEXT:    subq $1072, %rsp ## imm = 0x430
+; X64-SPILL-KNL-NEXT:    kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
+; X64-SPILL-KNL-NEXT:    kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
+; X64-SPILL-KNL-NEXT:    kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
+; X64-SPILL-KNL-NEXT:    kmovw %k4, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
+; X64-SPILL-KNL-NEXT:    vmovups %zmm31, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-KNL-NEXT:    vmovups %zmm30, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-KNL-NEXT:    vmovups %zmm29, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-KNL-NEXT:    vmovups %zmm28, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-KNL-NEXT:    vmovups %zmm27, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-KNL-NEXT:    vmovups %zmm26, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-KNL-NEXT:    vmovups %zmm25, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-KNL-NEXT:    vmovups %zmm24, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byt...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/114791