[llvm] [Codegen] Spill/Restore FP/BP under option (PR #114791)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 5 23:36:31 PST 2024
https://github.com/mahesh-attarde updated https://github.com/llvm/llvm-project/pull/114791
>From 50144c4034f4fa023a1dd6514dfc93e2ce56f38f Mon Sep 17 00:00:00 2001
From: mattarde <mattarde at intel.com>
Date: Mon, 4 Nov 2024 05:11:17 -0800
Subject: [PATCH] Spill/Restore FP/BP under option
---
llvm/lib/CodeGen/PrologEpilogInserter.cpp | 8 +-
.../X86/apx/push2-pop2-vector-register.ll | 33 ++-
llvm/test/CodeGen/X86/apx/push2-pop2.ll | 175 ++++++++++++---
llvm/test/CodeGen/X86/apx/pushp-popp.ll | 20 +-
llvm/test/CodeGen/X86/avx512-intel-ocl.ll | 199 +++++++++++++++++-
llvm/test/CodeGen/X86/clobber_base_ptr.ll | 2 +-
llvm/test/CodeGen/X86/clobber_frame_ptr.ll | 2 +-
llvm/test/CodeGen/X86/clobber_frame_ptr2.ll | 3 +-
.../test/CodeGen/X86/clobber_frame_ptr_x32.ll | 2 +-
llvm/test/CodeGen/X86/i386-baseptr.ll | 4 +-
.../X86/inline-asm-function-call-pic.ll | 30 ++-
llvm/test/CodeGen/X86/x86-32-intrcc.ll | 4 +-
llvm/test/CodeGen/X86/x86-64-baseptr.ll | 194 +++++++++++++++--
13 files changed, 612 insertions(+), 64 deletions(-)
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index ee03eaa8ae527c..da9385ce4c96d4 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -77,6 +77,11 @@ using MBBVector = SmallVector<MachineBasicBlock *, 4>;
STATISTIC(NumLeafFuncWithSpills, "Number of leaf functions with CSRs");
STATISTIC(NumFuncSeen, "Number of functions seen in PEI");
+// Experimental Feature enables spilling and reload FP/BP
+static cl::opt<bool>
+ EnableSpillFPBP("enable-spill-fpbp",
+ cl::desc("Spill clobbered fp register to stack."),
+ cl::init(false), cl::Hidden);
namespace {
@@ -231,7 +236,8 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
// Spill frame pointer and/or base pointer registers if they are clobbered.
// It is placed before call frame instruction elimination so it will not mess
// with stack arguments.
- TFI->spillFPBP(MF);
+ if (EnableSpillFPBP)
+ TFI->spillFPBP(MF);
// Calculate the MaxCallFrameSize value for the function's frame
// information. Also eliminates call frame pseudo instructions.
diff --git a/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll b/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll
index f20c4c1ae27867..b4b205bd292276 100644
--- a/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll
+++ b/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll
@@ -2,6 +2,7 @@
; Check PUSH2/POP2 is not used for vector registers
; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+push2pop2 | FileCheck %s --check-prefix=CHECK
; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+push2pop2 -frame-pointer=all | FileCheck %s --check-prefix=FRAME
+; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+push2pop2 -frame-pointer=all --enable-spill-fpbp=true | FileCheck %s --check-prefix=FRAME-SPILL
define void @widget(float %arg) nounwind {
; CHECK-LABEL: widget:
@@ -43,17 +44,41 @@ define void @widget(float %arg) nounwind {
; FRAME-NEXT: xorl %r8d, %r8d
; FRAME-NEXT: callq *%rsi
; FRAME-NEXT: movss %xmm6, 0
-; FRAME-NEXT: pushq %rbp
-; FRAME-NEXT: pushq %rax
; FRAME-NEXT: #APP
; FRAME-NEXT: #NO_APP
-; FRAME-NEXT: popq %rax
-; FRAME-NEXT: popq %rbp
; FRAME-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
; FRAME-NEXT: addq $48, %rsp
; FRAME-NEXT: pop2 %r15, %rsi
; FRAME-NEXT: popq %rbp
; FRAME-NEXT: retq
+;
+; FRAME-SPILL-LABEL: widget:
+; FRAME-SPILL: # %bb.0: # %bb
+; FRAME-SPILL-NEXT: pushq %rbp
+; FRAME-SPILL-NEXT: push2 %rsi, %r15
+; FRAME-SPILL-NEXT: subq $48, %rsp
+; FRAME-SPILL-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
+; FRAME-SPILL-NEXT: movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; FRAME-SPILL-NEXT: movaps %xmm0, %xmm6
+; FRAME-SPILL-NEXT: xorl %esi, %esi
+; FRAME-SPILL-NEXT: xorl %ecx, %ecx
+; FRAME-SPILL-NEXT: callq *%rsi
+; FRAME-SPILL-NEXT: xorl %ecx, %ecx
+; FRAME-SPILL-NEXT: xorl %edx, %edx
+; FRAME-SPILL-NEXT: xorl %r8d, %r8d
+; FRAME-SPILL-NEXT: callq *%rsi
+; FRAME-SPILL-NEXT: movss %xmm6, 0
+; FRAME-SPILL-NEXT: pushq %rbp
+; FRAME-SPILL-NEXT: pushq %rax
+; FRAME-SPILL-NEXT: #APP
+; FRAME-SPILL-NEXT: #NO_APP
+; FRAME-SPILL-NEXT: popq %rax
+; FRAME-SPILL-NEXT: popq %rbp
+; FRAME-SPILL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
+; FRAME-SPILL-NEXT: addq $48, %rsp
+; FRAME-SPILL-NEXT: pop2 %r15, %rsi
+; FRAME-SPILL-NEXT: popq %rbp
+; FRAME-SPILL-NEXT: retq
bb:
%call = tail call float null(ptr null)
%call1 = tail call i32 null(ptr null, i32 0, i32 0)
diff --git a/llvm/test/CodeGen/X86/apx/push2-pop2.ll b/llvm/test/CodeGen/X86/apx/push2-pop2.ll
index f5be484be2b1a6..d6bb1a24aa6b7b 100644
--- a/llvm/test/CodeGen/X86/apx/push2-pop2.ll
+++ b/llvm/test/CodeGen/X86/apx/push2-pop2.ll
@@ -2,6 +2,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+push2pop2 | FileCheck %s --check-prefix=CHECK
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+push2pop2,+ppx | FileCheck %s --check-prefix=PPX
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+push2pop2 -frame-pointer=all | FileCheck %s --check-prefix=FRAME
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+push2pop2 -frame-pointer=all --enable-spill-fpbp=true | FileCheck %s --check-prefix=FRAME-SPILL
define void @csr1() nounwind {
; CHECK-LABEL: csr1:
@@ -24,14 +25,23 @@ define void @csr1() nounwind {
; FRAME: # %bb.0: # %entry
; FRAME-NEXT: pushq %rbp
; FRAME-NEXT: movq %rsp, %rbp
-; FRAME-NEXT: pushq %rbp
-; FRAME-NEXT: pushq %rax
; FRAME-NEXT: #APP
; FRAME-NEXT: #NO_APP
-; FRAME-NEXT: popq %rax
-; FRAME-NEXT: popq %rbp
; FRAME-NEXT: popq %rbp
; FRAME-NEXT: retq
+;
+; FRAME-SPILL-LABEL: csr1:
+; FRAME-SPILL: # %bb.0: # %entry
+; FRAME-SPILL-NEXT: pushq %rbp
+; FRAME-SPILL-NEXT: movq %rsp, %rbp
+; FRAME-SPILL-NEXT: pushq %rbp
+; FRAME-SPILL-NEXT: pushq %rax
+; FRAME-SPILL-NEXT: #APP
+; FRAME-SPILL-NEXT: #NO_APP
+; FRAME-SPILL-NEXT: popq %rax
+; FRAME-SPILL-NEXT: popq %rbp
+; FRAME-SPILL-NEXT: popq %rbp
+; FRAME-SPILL-NEXT: retq
entry:
tail call void asm sideeffect "", "~{rbp},~{dirflag},~{fpsr},~{flags}"()
ret void
@@ -63,15 +73,26 @@ define void @csr2() nounwind {
; FRAME-NEXT: pushq %rbp
; FRAME-NEXT: movq %rsp, %rbp
; FRAME-NEXT: pushq %r15
-; FRAME-NEXT: pushq %rbp
-; FRAME-NEXT: pushq %rax
; FRAME-NEXT: #APP
; FRAME-NEXT: #NO_APP
-; FRAME-NEXT: popq %rax
-; FRAME-NEXT: popq %rbp
; FRAME-NEXT: popq %r15
; FRAME-NEXT: popq %rbp
; FRAME-NEXT: retq
+;
+; FRAME-SPILL-LABEL: csr2:
+; FRAME-SPILL: # %bb.0: # %entry
+; FRAME-SPILL-NEXT: pushq %rbp
+; FRAME-SPILL-NEXT: movq %rsp, %rbp
+; FRAME-SPILL-NEXT: pushq %r15
+; FRAME-SPILL-NEXT: pushq %rbp
+; FRAME-SPILL-NEXT: pushq %rax
+; FRAME-SPILL-NEXT: #APP
+; FRAME-SPILL-NEXT: #NO_APP
+; FRAME-SPILL-NEXT: popq %rax
+; FRAME-SPILL-NEXT: popq %rbp
+; FRAME-SPILL-NEXT: popq %r15
+; FRAME-SPILL-NEXT: popq %rbp
+; FRAME-SPILL-NEXT: retq
entry:
tail call void asm sideeffect "", "~{rbp},~{r15},~{dirflag},~{fpsr},~{flags}"()
ret void
@@ -103,15 +124,26 @@ define void @csr3() nounwind {
; FRAME-NEXT: pushq %rbp
; FRAME-NEXT: movq %rsp, %rbp
; FRAME-NEXT: push2 %r14, %r15
-; FRAME-NEXT: pushq %rbp
-; FRAME-NEXT: pushq %rax
; FRAME-NEXT: #APP
; FRAME-NEXT: #NO_APP
-; FRAME-NEXT: popq %rax
-; FRAME-NEXT: popq %rbp
; FRAME-NEXT: pop2 %r15, %r14
; FRAME-NEXT: popq %rbp
; FRAME-NEXT: retq
+;
+; FRAME-SPILL-LABEL: csr3:
+; FRAME-SPILL: # %bb.0: # %entry
+; FRAME-SPILL-NEXT: pushq %rbp
+; FRAME-SPILL-NEXT: movq %rsp, %rbp
+; FRAME-SPILL-NEXT: push2 %r14, %r15
+; FRAME-SPILL-NEXT: pushq %rbp
+; FRAME-SPILL-NEXT: pushq %rax
+; FRAME-SPILL-NEXT: #APP
+; FRAME-SPILL-NEXT: #NO_APP
+; FRAME-SPILL-NEXT: popq %rax
+; FRAME-SPILL-NEXT: popq %rbp
+; FRAME-SPILL-NEXT: pop2 %r15, %r14
+; FRAME-SPILL-NEXT: popq %rbp
+; FRAME-SPILL-NEXT: retq
entry:
tail call void asm sideeffect "", "~{rbp},~{r15},~{r14},~{dirflag},~{fpsr},~{flags}"()
ret void
@@ -148,16 +180,29 @@ define void @csr4() nounwind {
; FRAME-NEXT: movq %rsp, %rbp
; FRAME-NEXT: push2 %r14, %r15
; FRAME-NEXT: pushq %r13
-; FRAME-NEXT: pushq %rbp
-; FRAME-NEXT: pushq %rax
; FRAME-NEXT: #APP
; FRAME-NEXT: #NO_APP
-; FRAME-NEXT: popq %rax
-; FRAME-NEXT: popq %rbp
; FRAME-NEXT: popq %r13
; FRAME-NEXT: pop2 %r15, %r14
; FRAME-NEXT: popq %rbp
; FRAME-NEXT: retq
+;
+; FRAME-SPILL-LABEL: csr4:
+; FRAME-SPILL: # %bb.0: # %entry
+; FRAME-SPILL-NEXT: pushq %rbp
+; FRAME-SPILL-NEXT: movq %rsp, %rbp
+; FRAME-SPILL-NEXT: push2 %r14, %r15
+; FRAME-SPILL-NEXT: pushq %r13
+; FRAME-SPILL-NEXT: pushq %rbp
+; FRAME-SPILL-NEXT: pushq %rax
+; FRAME-SPILL-NEXT: #APP
+; FRAME-SPILL-NEXT: #NO_APP
+; FRAME-SPILL-NEXT: popq %rax
+; FRAME-SPILL-NEXT: popq %rbp
+; FRAME-SPILL-NEXT: popq %r13
+; FRAME-SPILL-NEXT: pop2 %r15, %r14
+; FRAME-SPILL-NEXT: popq %rbp
+; FRAME-SPILL-NEXT: retq
entry:
tail call void asm sideeffect "", "~{rbp},~{r15},~{r14},~{r13},~{dirflag},~{fpsr},~{flags}"()
ret void
@@ -194,16 +239,29 @@ define void @csr5() nounwind {
; FRAME-NEXT: movq %rsp, %rbp
; FRAME-NEXT: push2 %r14, %r15
; FRAME-NEXT: push2 %r12, %r13
-; FRAME-NEXT: pushq %rbp
-; FRAME-NEXT: pushq %rax
; FRAME-NEXT: #APP
; FRAME-NEXT: #NO_APP
-; FRAME-NEXT: popq %rax
-; FRAME-NEXT: popq %rbp
; FRAME-NEXT: pop2 %r13, %r12
; FRAME-NEXT: pop2 %r15, %r14
; FRAME-NEXT: popq %rbp
; FRAME-NEXT: retq
+;
+; FRAME-SPILL-LABEL: csr5:
+; FRAME-SPILL: # %bb.0: # %entry
+; FRAME-SPILL-NEXT: pushq %rbp
+; FRAME-SPILL-NEXT: movq %rsp, %rbp
+; FRAME-SPILL-NEXT: push2 %r14, %r15
+; FRAME-SPILL-NEXT: push2 %r12, %r13
+; FRAME-SPILL-NEXT: pushq %rbp
+; FRAME-SPILL-NEXT: pushq %rax
+; FRAME-SPILL-NEXT: #APP
+; FRAME-SPILL-NEXT: #NO_APP
+; FRAME-SPILL-NEXT: popq %rax
+; FRAME-SPILL-NEXT: popq %rbp
+; FRAME-SPILL-NEXT: pop2 %r13, %r12
+; FRAME-SPILL-NEXT: pop2 %r15, %r14
+; FRAME-SPILL-NEXT: popq %rbp
+; FRAME-SPILL-NEXT: retq
entry:
tail call void asm sideeffect "", "~{rbp},~{r15},~{r14},~{r13},~{r12},~{dirflag},~{fpsr},~{flags}"()
ret void
@@ -245,17 +303,32 @@ define void @csr6() nounwind {
; FRAME-NEXT: push2 %r14, %r15
; FRAME-NEXT: push2 %r12, %r13
; FRAME-NEXT: pushq %rbx
-; FRAME-NEXT: pushq %rbp
-; FRAME-NEXT: pushq %rax
; FRAME-NEXT: #APP
; FRAME-NEXT: #NO_APP
-; FRAME-NEXT: popq %rax
-; FRAME-NEXT: popq %rbp
; FRAME-NEXT: popq %rbx
; FRAME-NEXT: pop2 %r13, %r12
; FRAME-NEXT: pop2 %r15, %r14
; FRAME-NEXT: popq %rbp
; FRAME-NEXT: retq
+;
+; FRAME-SPILL-LABEL: csr6:
+; FRAME-SPILL: # %bb.0: # %entry
+; FRAME-SPILL-NEXT: pushq %rbp
+; FRAME-SPILL-NEXT: movq %rsp, %rbp
+; FRAME-SPILL-NEXT: push2 %r14, %r15
+; FRAME-SPILL-NEXT: push2 %r12, %r13
+; FRAME-SPILL-NEXT: pushq %rbx
+; FRAME-SPILL-NEXT: pushq %rbp
+; FRAME-SPILL-NEXT: pushq %rax
+; FRAME-SPILL-NEXT: #APP
+; FRAME-SPILL-NEXT: #NO_APP
+; FRAME-SPILL-NEXT: popq %rax
+; FRAME-SPILL-NEXT: popq %rbp
+; FRAME-SPILL-NEXT: popq %rbx
+; FRAME-SPILL-NEXT: pop2 %r13, %r12
+; FRAME-SPILL-NEXT: pop2 %r15, %r14
+; FRAME-SPILL-NEXT: popq %rbp
+; FRAME-SPILL-NEXT: retq
entry:
tail call void asm sideeffect "", "~{rbp},~{r15},~{r14},~{r13},~{r12},~{rbx},~{dirflag},~{fpsr},~{flags}"()
ret void
@@ -421,6 +494,60 @@ define void @lea_in_epilog(i1 %arg, ptr %arg1, ptr %arg2, i64 %arg3, i64 %arg4,
; FRAME-NEXT: movq $0, (%rax)
; FRAME-NEXT: .LBB6_5: # %bb14
; FRAME-NEXT: retq
+;
+; FRAME-SPILL-LABEL: lea_in_epilog:
+; FRAME-SPILL: # %bb.0: # %bb
+; FRAME-SPILL-NEXT: testb $1, %dil
+; FRAME-SPILL-NEXT: je .LBB6_5
+; FRAME-SPILL-NEXT: # %bb.1: # %bb13
+; FRAME-SPILL-NEXT: pushq %rbp
+; FRAME-SPILL-NEXT: movq %rsp, %rbp
+; FRAME-SPILL-NEXT: push2 %r14, %r15
+; FRAME-SPILL-NEXT: push2 %r12, %r13
+; FRAME-SPILL-NEXT: pushq %rbx
+; FRAME-SPILL-NEXT: subq $24, %rsp
+; FRAME-SPILL-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; FRAME-SPILL-NEXT: addq 16(%rbp), %r9
+; FRAME-SPILL-NEXT: movq 48(%rbp), %rbx
+; FRAME-SPILL-NEXT: addq %r9, %rbx
+; FRAME-SPILL-NEXT: movq 40(%rbp), %r12
+; FRAME-SPILL-NEXT: addq %r9, %r12
+; FRAME-SPILL-NEXT: movq 32(%rbp), %r15
+; FRAME-SPILL-NEXT: addq %r9, %r15
+; FRAME-SPILL-NEXT: xorl %r13d, %r13d
+; FRAME-SPILL-NEXT: xorl %r14d, %r14d
+; FRAME-SPILL-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; FRAME-SPILL-NEXT: .p2align 4
+; FRAME-SPILL-NEXT: .LBB6_2: # %bb15
+; FRAME-SPILL-NEXT: # =>This Inner Loop Header: Depth=1
+; FRAME-SPILL-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; FRAME-SPILL-NEXT: incq %r14
+; FRAME-SPILL-NEXT: movl $432, %edx # imm = 0x1B0
+; FRAME-SPILL-NEXT: xorl %edi, %edi
+; FRAME-SPILL-NEXT: movq %r12, %rsi
+; FRAME-SPILL-NEXT: callq memcpy at PLT
+; FRAME-SPILL-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload
+; FRAME-SPILL-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; FRAME-SPILL-NEXT: movq 16(%rbp), %rax
+; FRAME-SPILL-NEXT: addq %rax, %rbx
+; FRAME-SPILL-NEXT: addq %rax, %r12
+; FRAME-SPILL-NEXT: addq %rax, %r15
+; FRAME-SPILL-NEXT: addq %rax, %r9
+; FRAME-SPILL-NEXT: addq $8, %r13
+; FRAME-SPILL-NEXT: testb $1, %dil
+; FRAME-SPILL-NEXT: je .LBB6_2
+; FRAME-SPILL-NEXT: # %bb.3: # %bb11
+; FRAME-SPILL-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; FRAME-SPILL-NEXT: leaq {{[0-9]+}}(%rsp), %rsp
+; FRAME-SPILL-NEXT: popq %rbx
+; FRAME-SPILL-NEXT: pop2 %r13, %r12
+; FRAME-SPILL-NEXT: pop2 %r15, %r14
+; FRAME-SPILL-NEXT: popq %rbp
+; FRAME-SPILL-NEXT: jne .LBB6_5
+; FRAME-SPILL-NEXT: # %bb.4: # %bb12
+; FRAME-SPILL-NEXT: movq $0, (%rax)
+; FRAME-SPILL-NEXT: .LBB6_5: # %bb14
+; FRAME-SPILL-NEXT: retq
bb:
br i1 %arg, label %bb13, label %bb14
diff --git a/llvm/test/CodeGen/X86/apx/pushp-popp.ll b/llvm/test/CodeGen/X86/apx/pushp-popp.ll
index 625e70b07198e8..4097c59c56437b 100644
--- a/llvm/test/CodeGen/X86/apx/pushp-popp.ll
+++ b/llvm/test/CodeGen/X86/apx/pushp-popp.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ppx | FileCheck %s --check-prefix=CHECK
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ppx -frame-pointer=all | FileCheck %s --check-prefix=FRAME
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ppx -frame-pointer=all --enable-spill-fpbp=true | FileCheck %s --check-prefix=FRAME-SPILL
define void @csr2() nounwind {
; CHECK-LABEL: csr2:
@@ -18,15 +19,26 @@ define void @csr2() nounwind {
; FRAME-NEXT: pushp %rbp
; FRAME-NEXT: movq %rsp, %rbp
; FRAME-NEXT: pushp %r15
-; FRAME-NEXT: pushp %rbp
-; FRAME-NEXT: pushq %rax
; FRAME-NEXT: #APP
; FRAME-NEXT: #NO_APP
-; FRAME-NEXT: popq %rax
-; FRAME-NEXT: popp %rbp
; FRAME-NEXT: popp %r15
; FRAME-NEXT: popp %rbp
; FRAME-NEXT: retq
+;
+; FRAME-SPILL-LABEL: csr2:
+; FRAME-SPILL: # %bb.0: # %entry
+; FRAME-SPILL-NEXT: pushp %rbp
+; FRAME-SPILL-NEXT: movq %rsp, %rbp
+; FRAME-SPILL-NEXT: pushp %r15
+; FRAME-SPILL-NEXT: pushp %rbp
+; FRAME-SPILL-NEXT: pushq %rax
+; FRAME-SPILL-NEXT: #APP
+; FRAME-SPILL-NEXT: #NO_APP
+; FRAME-SPILL-NEXT: popq %rax
+; FRAME-SPILL-NEXT: popp %rbp
+; FRAME-SPILL-NEXT: popp %r15
+; FRAME-SPILL-NEXT: popp %rbp
+; FRAME-SPILL-NEXT: retq
entry:
tail call void asm sideeffect "", "~{rbp},~{r15},~{dirflag},~{fpsr},~{flags}"()
ret void
diff --git a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll
index 6c68279b8d04ae..3873b920d4de69 100644
--- a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll
+++ b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll
@@ -6,7 +6,9 @@
; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=knl | FileCheck %s -check-prefixes=WIN64,WIN64-KNL
; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=skx | FileCheck %s -check-prefixes=WIN64,WIN64-SKX
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s -check-prefixes=X64,X64-KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --enable-spill-fpbp=true | FileCheck %s -check-prefixes=X64-SPILL,X64-SPILL-KNL
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s -check-prefixes=X64,X64-SKX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx --enable-spill-fpbp=true | FileCheck %s -check-prefixes=X64-SPILL,X64-SPILL-SKX
declare <16 x float> @func_float16_ptr(<16 x float>, ptr)
declare <16 x float> @func_float16(<16 x float>, <16 x float>)
@@ -70,17 +72,35 @@ define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
; X64-NEXT: subq $128, %rsp
; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0
; X64-NEXT: movq %rsp, %rdi
-; X64-NEXT: pushq %rbp
-; X64-NEXT: pushq %rax
; X64-NEXT: callq _func_float16_ptr
-; X64-NEXT: addq $8, %rsp
-; X64-NEXT: popq %rbp
; X64-NEXT: vaddps (%rsp), %zmm0, %zmm0
; X64-NEXT: leaq -16(%rbp), %rsp
; X64-NEXT: popq %r12
; X64-NEXT: popq %r13
; X64-NEXT: popq %rbp
; X64-NEXT: retq
+;
+; X64-SPILL-LABEL: testf16_inp:
+; X64-SPILL: ## %bb.0:
+; X64-SPILL-NEXT: pushq %rbp
+; X64-SPILL-NEXT: movq %rsp, %rbp
+; X64-SPILL-NEXT: pushq %r13
+; X64-SPILL-NEXT: pushq %r12
+; X64-SPILL-NEXT: andq $-64, %rsp
+; X64-SPILL-NEXT: subq $128, %rsp
+; X64-SPILL-NEXT: vaddps %zmm1, %zmm0, %zmm0
+; X64-SPILL-NEXT: movq %rsp, %rdi
+; X64-SPILL-NEXT: pushq %rbp
+; X64-SPILL-NEXT: pushq %rax
+; X64-SPILL-NEXT: callq _func_float16_ptr
+; X64-SPILL-NEXT: addq $8, %rsp
+; X64-SPILL-NEXT: popq %rbp
+; X64-SPILL-NEXT: vaddps (%rsp), %zmm0, %zmm0
+; X64-SPILL-NEXT: leaq -16(%rbp), %rsp
+; X64-SPILL-NEXT: popq %r12
+; X64-SPILL-NEXT: popq %r13
+; X64-SPILL-NEXT: popq %rbp
+; X64-SPILL-NEXT: retq
%y = alloca <16 x float>, align 64
%x = fadd <16 x float> %a, %b
%1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, ptr %y)
@@ -154,11 +174,7 @@ define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
; X64-NEXT: vmovaps %zmm1, %zmm16
; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0
; X64-NEXT: movq %rsp, %rdi
-; X64-NEXT: pushq %rbp
-; X64-NEXT: pushq %rax
; X64-NEXT: callq _func_float16_ptr
-; X64-NEXT: addq $8, %rsp
-; X64-NEXT: popq %rbp
; X64-NEXT: vaddps %zmm16, %zmm0, %zmm0
; X64-NEXT: vaddps (%rsp), %zmm0, %zmm0
; X64-NEXT: leaq -16(%rbp), %rsp
@@ -166,6 +182,30 @@ define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
; X64-NEXT: popq %r13
; X64-NEXT: popq %rbp
; X64-NEXT: retq
+;
+; X64-SPILL-LABEL: testf16_regs:
+; X64-SPILL: ## %bb.0:
+; X64-SPILL-NEXT: pushq %rbp
+; X64-SPILL-NEXT: movq %rsp, %rbp
+; X64-SPILL-NEXT: pushq %r13
+; X64-SPILL-NEXT: pushq %r12
+; X64-SPILL-NEXT: andq $-64, %rsp
+; X64-SPILL-NEXT: subq $128, %rsp
+; X64-SPILL-NEXT: vmovaps %zmm1, %zmm16
+; X64-SPILL-NEXT: vaddps %zmm1, %zmm0, %zmm0
+; X64-SPILL-NEXT: movq %rsp, %rdi
+; X64-SPILL-NEXT: pushq %rbp
+; X64-SPILL-NEXT: pushq %rax
+; X64-SPILL-NEXT: callq _func_float16_ptr
+; X64-SPILL-NEXT: addq $8, %rsp
+; X64-SPILL-NEXT: popq %rbp
+; X64-SPILL-NEXT: vaddps %zmm16, %zmm0, %zmm0
+; X64-SPILL-NEXT: vaddps (%rsp), %zmm0, %zmm0
+; X64-SPILL-NEXT: leaq -16(%rbp), %rsp
+; X64-SPILL-NEXT: popq %r12
+; X64-SPILL-NEXT: popq %r13
+; X64-SPILL-NEXT: popq %rbp
+; X64-SPILL-NEXT: retq
%y = alloca <16 x float>, align 64
%x = fadd <16 x float> %a, %b
%1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, ptr %y)
@@ -348,6 +388,55 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl
; X64-KNL-NEXT: popq %rsi
; X64-KNL-NEXT: retq
;
+; X64-SPILL-KNL-LABEL: test_prolog_epilog:
+; X64-SPILL-KNL: ## %bb.0:
+; X64-SPILL-KNL-NEXT: pushq %rsi
+; X64-SPILL-KNL-NEXT: subq $1072, %rsp ## imm = 0x430
+; X64-SPILL-KNL-NEXT: kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
+; X64-SPILL-KNL-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
+; X64-SPILL-KNL-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
+; X64-SPILL-KNL-NEXT: kmovw %k4, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
+; X64-SPILL-KNL-NEXT: vmovups %zmm31, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-KNL-NEXT: vmovups %zmm30, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-KNL-NEXT: vmovups %zmm29, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-KNL-NEXT: vmovups %zmm28, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-KNL-NEXT: vmovups %zmm27, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-KNL-NEXT: vmovups %zmm26, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-KNL-NEXT: vmovups %zmm25, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-KNL-NEXT: vmovups %zmm24, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-KNL-NEXT: vmovups %zmm23, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-KNL-NEXT: vmovups %zmm22, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-KNL-NEXT: vmovups %zmm21, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-KNL-NEXT: vmovups %zmm20, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-KNL-NEXT: vmovups %zmm19, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-KNL-NEXT: vmovups %zmm18, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-KNL-NEXT: vmovups %zmm17, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-KNL-NEXT: vmovups %zmm16, (%rsp) ## 64-byte Spill
+; X64-SPILL-KNL-NEXT: callq _func_float16
+; X64-SPILL-KNL-NEXT: vmovups (%rsp), %zmm16 ## 64-byte Reload
+; X64-SPILL-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm17 ## 64-byte Reload
+; X64-SPILL-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm18 ## 64-byte Reload
+; X64-SPILL-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm19 ## 64-byte Reload
+; X64-SPILL-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm20 ## 64-byte Reload
+; X64-SPILL-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm21 ## 64-byte Reload
+; X64-SPILL-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm22 ## 64-byte Reload
+; X64-SPILL-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm23 ## 64-byte Reload
+; X64-SPILL-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm24 ## 64-byte Reload
+; X64-SPILL-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm25 ## 64-byte Reload
+; X64-SPILL-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm26 ## 64-byte Reload
+; X64-SPILL-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm27 ## 64-byte Reload
+; X64-SPILL-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm28 ## 64-byte Reload
+; X64-SPILL-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm29 ## 64-byte Reload
+; X64-SPILL-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm30 ## 64-byte Reload
+; X64-SPILL-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm31 ## 64-byte Reload
+; X64-SPILL-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload
+; X64-SPILL-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
+; X64-SPILL-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
+; X64-SPILL-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
+; X64-SPILL-KNL-NEXT: addq $1072, %rsp ## imm = 0x430
+; X64-SPILL-KNL-NEXT: popq %rsi
+; X64-SPILL-KNL-NEXT: retq
+;
; X64-SKX-LABEL: test_prolog_epilog:
; X64-SKX: ## %bb.0:
; X64-SKX-NEXT: pushq %rsi
@@ -396,6 +485,55 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl
; X64-SKX-NEXT: addq $1072, %rsp ## imm = 0x430
; X64-SKX-NEXT: popq %rsi
; X64-SKX-NEXT: retq
+;
+; X64-SPILL-SKX-LABEL: test_prolog_epilog:
+; X64-SPILL-SKX: ## %bb.0:
+; X64-SPILL-SKX-NEXT: pushq %rsi
+; X64-SPILL-SKX-NEXT: subq $1072, %rsp ## imm = 0x430
+; X64-SPILL-SKX-NEXT: kmovq %k7, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; X64-SPILL-SKX-NEXT: kmovq %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; X64-SPILL-SKX-NEXT: kmovq %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; X64-SPILL-SKX-NEXT: kmovq %k4, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; X64-SPILL-SKX-NEXT: vmovups %zmm31, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-SKX-NEXT: vmovups %zmm30, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-SKX-NEXT: vmovups %zmm29, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-SKX-NEXT: vmovups %zmm28, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-SKX-NEXT: vmovups %zmm27, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-SKX-NEXT: vmovups %zmm26, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-SKX-NEXT: vmovups %zmm25, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-SKX-NEXT: vmovups %zmm24, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-SKX-NEXT: vmovups %zmm23, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-SKX-NEXT: vmovups %zmm22, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-SKX-NEXT: vmovups %zmm21, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-SKX-NEXT: vmovups %zmm20, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-SKX-NEXT: vmovups %zmm19, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-SKX-NEXT: vmovups %zmm18, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-SKX-NEXT: vmovups %zmm17, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
+; X64-SPILL-SKX-NEXT: vmovups %zmm16, (%rsp) ## 64-byte Spill
+; X64-SPILL-SKX-NEXT: callq _func_float16
+; X64-SPILL-SKX-NEXT: vmovups (%rsp), %zmm16 ## 64-byte Reload
+; X64-SPILL-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm17 ## 64-byte Reload
+; X64-SPILL-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm18 ## 64-byte Reload
+; X64-SPILL-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm19 ## 64-byte Reload
+; X64-SPILL-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm20 ## 64-byte Reload
+; X64-SPILL-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm21 ## 64-byte Reload
+; X64-SPILL-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm22 ## 64-byte Reload
+; X64-SPILL-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm23 ## 64-byte Reload
+; X64-SPILL-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm24 ## 64-byte Reload
+; X64-SPILL-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm25 ## 64-byte Reload
+; X64-SPILL-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm26 ## 64-byte Reload
+; X64-SPILL-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm27 ## 64-byte Reload
+; X64-SPILL-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm28 ## 64-byte Reload
+; X64-SPILL-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm29 ## 64-byte Reload
+; X64-SPILL-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm30 ## 64-byte Reload
+; X64-SPILL-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm31 ## 64-byte Reload
+; X64-SPILL-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 8-byte Reload
+; X64-SPILL-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 8-byte Reload
+; X64-SPILL-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 8-byte Reload
+; X64-SPILL-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload
+; X64-SPILL-SKX-NEXT: addq $1072, %rsp ## imm = 0x430
+; X64-SPILL-SKX-NEXT: popq %rsi
+; X64-SPILL-SKX-NEXT: retq
%c = call <16 x float> @func_float16(<16 x float> %a, <16 x float> %b)
ret <16 x float> %c
}
@@ -465,6 +603,24 @@ define <16 x float> @testf16_inp_mask(<16 x float> %a, i16 %mask) {
; X64-KNL-NEXT: popq %rbp
; X64-KNL-NEXT: retq
;
+; X64-SPILL-KNL-LABEL: testf16_inp_mask:
+; X64-SPILL-KNL: ## %bb.0:
+; X64-SPILL-KNL-NEXT: pushq %rbp
+; X64-SPILL-KNL-NEXT: .cfi_def_cfa_offset 16
+; X64-SPILL-KNL-NEXT: pushq %r13
+; X64-SPILL-KNL-NEXT: .cfi_def_cfa_offset 24
+; X64-SPILL-KNL-NEXT: pushq %r12
+; X64-SPILL-KNL-NEXT: .cfi_def_cfa_offset 32
+; X64-SPILL-KNL-NEXT: .cfi_offset %r12, -32
+; X64-SPILL-KNL-NEXT: .cfi_offset %r13, -24
+; X64-SPILL-KNL-NEXT: .cfi_offset %rbp, -16
+; X64-SPILL-KNL-NEXT: kmovw %edi, %k1
+; X64-SPILL-KNL-NEXT: callq _func_float16_mask
+; X64-SPILL-KNL-NEXT: popq %r12
+; X64-SPILL-KNL-NEXT: popq %r13
+; X64-SPILL-KNL-NEXT: popq %rbp
+; X64-SPILL-KNL-NEXT: retq
+;
; X64-SKX-LABEL: testf16_inp_mask:
; X64-SKX: ## %bb.0:
; X64-SKX-NEXT: pushq %rbp
@@ -482,6 +638,24 @@ define <16 x float> @testf16_inp_mask(<16 x float> %a, i16 %mask) {
; X64-SKX-NEXT: popq %r13
; X64-SKX-NEXT: popq %rbp
; X64-SKX-NEXT: retq
+;
+; X64-SPILL-SKX-LABEL: testf16_inp_mask:
+; X64-SPILL-SKX: ## %bb.0:
+; X64-SPILL-SKX-NEXT: pushq %rbp
+; X64-SPILL-SKX-NEXT: .cfi_def_cfa_offset 16
+; X64-SPILL-SKX-NEXT: pushq %r13
+; X64-SPILL-SKX-NEXT: .cfi_def_cfa_offset 24
+; X64-SPILL-SKX-NEXT: pushq %r12
+; X64-SPILL-SKX-NEXT: .cfi_def_cfa_offset 32
+; X64-SPILL-SKX-NEXT: .cfi_offset %r12, -32
+; X64-SPILL-SKX-NEXT: .cfi_offset %r13, -24
+; X64-SPILL-SKX-NEXT: .cfi_offset %rbp, -16
+; X64-SPILL-SKX-NEXT: kmovd %edi, %k1
+; X64-SPILL-SKX-NEXT: callq _func_float16_mask
+; X64-SPILL-SKX-NEXT: popq %r12
+; X64-SPILL-SKX-NEXT: popq %r13
+; X64-SPILL-SKX-NEXT: popq %rbp
+; X64-SPILL-SKX-NEXT: retq
%imask = bitcast i16 %mask to <16 x i1>
%1 = call intel_ocl_bicc <16 x float> @func_float16_mask(<16 x float> %a, <16 x i1> %imask)
ret <16 x float> %1
@@ -521,6 +695,15 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog_with_mask(<16 x float> %a
; X64-NEXT: callq _func_float16_mask
; X64-NEXT: popq %rax
; X64-NEXT: retq
+;
+; X64-SPILL-LABEL: test_prolog_epilog_with_mask:
+; X64-SPILL: ## %bb.0:
+; X64-SPILL-NEXT: pushq %rax
+; X64-SPILL-NEXT: vpcmpeqd %zmm2, %zmm1, %k0
+; X64-SPILL-NEXT: kxorw %k1, %k0, %k1
+; X64-SPILL-NEXT: callq _func_float16_mask
+; X64-SPILL-NEXT: popq %rax
+; X64-SPILL-NEXT: retq
%cmp_res = icmp eq <16 x i32>%x1, %x2
%mask1 = xor <16 x i1> %cmp_res, %mask
%c = call intel_ocl_bicc <16 x float> @func_float16_mask(<16 x float> %a, <16 x i1>%mask1)
diff --git a/llvm/test/CodeGen/X86/clobber_base_ptr.ll b/llvm/test/CodeGen/X86/clobber_base_ptr.ll
index 2c39560f02d160..343487b609d907 100644
--- a/llvm/test/CodeGen/X86/clobber_base_ptr.ll
+++ b/llvm/test/CodeGen/X86/clobber_base_ptr.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc < %s | FileCheck %s
+; RUN: llc --enable-spill-fpbp=true < %s | FileCheck %s
target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:32-n8:16:32-a:0:32-S32"
target triple = "i386-pc-windows-gnu"
diff --git a/llvm/test/CodeGen/X86/clobber_frame_ptr.ll b/llvm/test/CodeGen/X86/clobber_frame_ptr.ll
index f6b38839d13cc2..6403fac5a52412 100644
--- a/llvm/test/CodeGen/X86/clobber_frame_ptr.ll
+++ b/llvm/test/CodeGen/X86/clobber_frame_ptr.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=x86_64-pc-linux -stackrealign -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux -stackrealign --enable-spill-fpbp=true -verify-machineinstrs < %s | FileCheck %s
; Calling convention ghccc uses ebp to pass parameter, so calling a function
; using ghccc clobbers ebp. We should save and restore ebp around such a call
diff --git a/llvm/test/CodeGen/X86/clobber_frame_ptr2.ll b/llvm/test/CodeGen/X86/clobber_frame_ptr2.ll
index 0551152a0718d7..51d02d1b9c1e23 100644
--- a/llvm/test/CodeGen/X86/clobber_frame_ptr2.ll
+++ b/llvm/test/CodeGen/X86/clobber_frame_ptr2.ll
@@ -1,4 +1,5 @@
-; RUN: not llc -mtriple=x86_64-pc-linux -stackrealign -verify-machineinstrs %s -o - 2>&1 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: not llc -mtriple=x86_64-pc-linux -stackrealign --enable-spill-fpbp=true -verify-machineinstrs %s -o - 2>&1 | FileCheck %s
declare cc 11 i64 @hipe2(i64, i64, i64, i64, i64, i64, i64)
diff --git a/llvm/test/CodeGen/X86/clobber_frame_ptr_x32.ll b/llvm/test/CodeGen/X86/clobber_frame_ptr_x32.ll
index 25c951d8b1a109..b4b39d91dc5c93 100644
--- a/llvm/test/CodeGen/X86/clobber_frame_ptr_x32.ll
+++ b/llvm/test/CodeGen/X86/clobber_frame_ptr_x32.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc < %s | FileCheck %s
+; RUN: llc --enable-spill-fpbp=true < %s | FileCheck %s
target triple = "x86_64-linux-gnux32"
diff --git a/llvm/test/CodeGen/X86/i386-baseptr.ll b/llvm/test/CodeGen/X86/i386-baseptr.ll
index 777eb838b84cc7..a4a5a919560e5a 100644
--- a/llvm/test/CodeGen/X86/i386-baseptr.ll
+++ b/llvm/test/CodeGen/X86/i386-baseptr.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=i386-pc-linux -stackrealign -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=i386-pc-none-elf -stackrealign -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=i386-pc-linux -stackrealign --enable-spill-fpbp=true -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=i386-pc-none-elf -stackrealign --enable-spill-fpbp=true -verify-machineinstrs < %s | FileCheck %s
declare i32 @helper() nounwind
define void @base() #0 {
diff --git a/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll b/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll
index d3ca872509ad5a..c1d611e6810694 100644
--- a/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll
@@ -1,4 +1,5 @@
-; RUN: llc -O2 --relocation-model=pic -mtriple=i386-unknown-linux-gnu < %s 2>&1 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -O2 --relocation-model=pic -mtriple=i386-unknown-linux-gnu --enable-spill-fpbp=true < %s 2>&1 | FileCheck %s
; List the source code:
; // clang -m32 -fasm-blocks -S t.c -O2 -fpic -emit-llvm
@@ -31,7 +32,14 @@
define void @func() local_unnamed_addr #0 {
; CHECK-LABEL: func:
-; CHECK: calll .L0$pb
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushl %ebp
+; CHECK-NEXT: movl %esp, %ebp
+; CHECK-NEXT: pushl %ebx
+; CHECK-NEXT: pushl %edi
+; CHECK-NEXT: pushl %esi
+; CHECK-NEXT: subl $12, %esp
+; CHECK-NEXT: calll .L0$pb
; CHECK-NEXT: .L0$pb:
; CHECK-NEXT: popl %ebx
; CHECK-NEXT: .Ltmp0:
@@ -56,6 +64,12 @@ define void @func() local_unnamed_addr #0 {
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: addl $12, %esp
; CHECK-NEXT: popl %ebp
+; CHECK-NEXT: addl $12, %esp
+; CHECK-NEXT: popl %esi
+; CHECK-NEXT: popl %edi
+; CHECK-NEXT: popl %ebx
+; CHECK-NEXT: popl %ebp
+; CHECK-NEXT: retl
entry:
%call = tail call i32 @static_func()
;; We test call, CALL, and jmp.
@@ -66,6 +80,18 @@ entry:
declare i32 @extern_func(...) #0
define internal i32 @static_func() #0 {
+; CHECK-LABEL: static_func:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: calll .L1$pb
+; CHECK-NEXT: .L1$pb:
+; CHECK-NEXT: popl %eax
+; CHECK-NEXT: .Ltmp1:
+; CHECK-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.L1$pb), %eax
+; CHECK-NEXT: movl GV at GOT(%eax), %ecx
+; CHECK-NEXT: movl (%ecx), %eax
+; CHECK-NEXT: leal 1(%eax), %edx
+; CHECK-NEXT: movl %edx, (%ecx)
+; CHECK-NEXT: retl
entry:
%0 = load i32, ptr @GV, align 4
%inc = add nsw i32 %0, 1
diff --git a/llvm/test/CodeGen/X86/x86-32-intrcc.ll b/llvm/test/CodeGen/X86/x86-32-intrcc.ll
index a0f937e2c323b6..459b659aa71fed 100644
--- a/llvm/test/CodeGen/X86/x86-32-intrcc.ll
+++ b/llvm/test/CodeGen/X86/x86-32-intrcc.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp --version 2
; RUN: llc -mtriple=i686-unknown-unknown < %s | FileCheck %s
-; RUN: llc -mtriple=i686-unknown-unknown -O0 < %s | FileCheck %s -check-prefix=CHECK0
+; RUN: llc -mtriple=i686-unknown-unknown -O0 --enable-spill-fpbp=true < %s | FileCheck %s -check-prefix=CHECK0
%struct.interrupt_frame = type { i32, i32, i32, i32, i32 }
@@ -108,10 +108,8 @@ define x86_intrcc void @test_isr_clobbers(ptr byval(%struct.interrupt_frame) %fr
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: andl $-16, %esp
; CHECK-NEXT: cld
-; CHECK-NEXT: pushl %ebp
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
-; CHECK-NEXT: popl %ebp
; CHECK-NEXT: leal -12(%ebp), %esp
; CHECK-NEXT: popl %eax
; CHECK-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/x86-64-baseptr.ll b/llvm/test/CodeGen/X86/x86-64-baseptr.ll
index 020004def6e7ad..cb3d9580db51e3 100644
--- a/llvm/test/CodeGen/X86/x86-64-baseptr.ll
+++ b/llvm/test/CodeGen/X86/x86-64-baseptr.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-pc-linux -stackrealign -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux-gnux32 -stackrealign -verify-machineinstrs --enable-spill-fpbp=true < %s | FileCheck -check-prefix=X32ABI-SPILL %s
; RUN: llc -mtriple=x86_64-pc-linux-gnux32 -stackrealign -verify-machineinstrs < %s | FileCheck -check-prefix=X32ABI %s
; This should run with NaCl as well ( -mtriple=x86_64-pc-nacl ) but currently doesn't due to PR22655
@@ -40,6 +41,34 @@ define void @base() #0 {
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
; CHECK-NEXT: retq
;
+; X32ABI-SPILL-LABEL: base:
+; X32ABI-SPILL: # %bb.0: # %entry
+; X32ABI-SPILL-NEXT: pushq %rbp
+; X32ABI-SPILL-NEXT: .cfi_def_cfa_offset 16
+; X32ABI-SPILL-NEXT: .cfi_offset %rbp, -16
+; X32ABI-SPILL-NEXT: movl %esp, %ebp
+; X32ABI-SPILL-NEXT: .cfi_def_cfa_register %rbp
+; X32ABI-SPILL-NEXT: pushq %rbx
+; X32ABI-SPILL-NEXT: andl $-32, %esp
+; X32ABI-SPILL-NEXT: subl $32, %esp
+; X32ABI-SPILL-NEXT: movl %esp, %ebx
+; X32ABI-SPILL-NEXT: .cfi_offset %rbx, -24
+; X32ABI-SPILL-NEXT: callq helper at PLT
+; X32ABI-SPILL-NEXT: # kill: def $eax killed $eax def $rax
+; X32ABI-SPILL-NEXT: leal 31(,%rax,4), %eax
+; X32ABI-SPILL-NEXT: andl $-32, %eax
+; X32ABI-SPILL-NEXT: movl %esp, %ecx
+; X32ABI-SPILL-NEXT: movl %ecx, %edx
+; X32ABI-SPILL-NEXT: subl %eax, %edx
+; X32ABI-SPILL-NEXT: negl %eax
+; X32ABI-SPILL-NEXT: movl %edx, %esp
+; X32ABI-SPILL-NEXT: movl $0, (%ecx,%eax)
+; X32ABI-SPILL-NEXT: leal -8(%ebp), %esp
+; X32ABI-SPILL-NEXT: popq %rbx
+; X32ABI-SPILL-NEXT: popq %rbp
+; X32ABI-SPILL-NEXT: .cfi_def_cfa %rsp, 8
+; X32ABI-SPILL-NEXT: retq
+;
; X32ABI-LABEL: base:
; X32ABI: # %bb.0: # %entry
; X32ABI-NEXT: pushq %rbp
@@ -115,6 +144,46 @@ define void @clobber_base() #0 {
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
; CHECK-NEXT: retq
;
+; X32ABI-SPILL-LABEL: clobber_base:
+; X32ABI-SPILL: # %bb.0: # %entry
+; X32ABI-SPILL-NEXT: pushq %rbp
+; X32ABI-SPILL-NEXT: .cfi_def_cfa_offset 16
+; X32ABI-SPILL-NEXT: .cfi_offset %rbp, -16
+; X32ABI-SPILL-NEXT: movl %esp, %ebp
+; X32ABI-SPILL-NEXT: .cfi_def_cfa_register %rbp
+; X32ABI-SPILL-NEXT: pushq %rbx
+; X32ABI-SPILL-NEXT: andl $-128, %esp
+; X32ABI-SPILL-NEXT: subl $128, %esp
+; X32ABI-SPILL-NEXT: movl %esp, %ebx
+; X32ABI-SPILL-NEXT: .cfi_offset %rbx, -24
+; X32ABI-SPILL-NEXT: callq helper at PLT
+; X32ABI-SPILL-NEXT: # kill: def $eax killed $eax def $rax
+; X32ABI-SPILL-NEXT: leal 31(,%rax,4), %eax
+; X32ABI-SPILL-NEXT: andl $-32, %eax
+; X32ABI-SPILL-NEXT: movl %esp, %ecx
+; X32ABI-SPILL-NEXT: movl %ecx, %edx
+; X32ABI-SPILL-NEXT: subl %eax, %edx
+; X32ABI-SPILL-NEXT: negl %eax
+; X32ABI-SPILL-NEXT: movl %edx, %esp
+; X32ABI-SPILL-NEXT: pushq %rbx
+; X32ABI-SPILL-NEXT: subl $24, %esp
+; X32ABI-SPILL-NEXT: movl $405, %ebx # imm = 0x195
+; X32ABI-SPILL-NEXT: #APP
+; X32ABI-SPILL-NEXT: nop
+; X32ABI-SPILL-NEXT: #NO_APP
+; X32ABI-SPILL-NEXT: addl $24, %esp
+; X32ABI-SPILL-NEXT: popq %rbx
+; X32ABI-SPILL-NEXT: movl $8, %edx
+; X32ABI-SPILL-NEXT: #APP
+; X32ABI-SPILL-NEXT: movl %edx, (%ebx)
+; X32ABI-SPILL-NEXT: #NO_APP
+; X32ABI-SPILL-NEXT: movl $0, (%ecx,%eax)
+; X32ABI-SPILL-NEXT: leal -8(%ebp), %esp
+; X32ABI-SPILL-NEXT: popq %rbx
+; X32ABI-SPILL-NEXT: popq %rbp
+; X32ABI-SPILL-NEXT: .cfi_def_cfa %rsp, 8
+; X32ABI-SPILL-NEXT: retq
+;
; X32ABI-LABEL: clobber_base:
; X32ABI: # %bb.0: # %entry
; X32ABI-NEXT: pushq %rbp
@@ -136,14 +205,10 @@ define void @clobber_base() #0 {
; X32ABI-NEXT: subl %eax, %edx
; X32ABI-NEXT: negl %eax
; X32ABI-NEXT: movl %edx, %esp
-; X32ABI-NEXT: pushq %rbx
-; X32ABI-NEXT: subl $24, %esp
; X32ABI-NEXT: movl $405, %ebx # imm = 0x195
; X32ABI-NEXT: #APP
; X32ABI-NEXT: nop
; X32ABI-NEXT: #NO_APP
-; X32ABI-NEXT: addl $24, %esp
-; X32ABI-NEXT: popq %rbx
; X32ABI-NEXT: movl $8, %edx
; X32ABI-NEXT: #APP
; X32ABI-NEXT: movl %edx, (%ebx)
@@ -234,6 +299,74 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
; CHECK-NEXT: retq
;
+; X32ABI-SPILL-LABEL: clobber_baseptr_argptr:
+; X32ABI-SPILL: # %bb.0: # %entry
+; X32ABI-SPILL-NEXT: pushq %rbp
+; X32ABI-SPILL-NEXT: .cfi_def_cfa_offset 16
+; X32ABI-SPILL-NEXT: .cfi_offset %rbp, -16
+; X32ABI-SPILL-NEXT: movl %esp, %ebp
+; X32ABI-SPILL-NEXT: .cfi_def_cfa_register %rbp
+; X32ABI-SPILL-NEXT: pushq %rbx
+; X32ABI-SPILL-NEXT: andl $-128, %esp
+; X32ABI-SPILL-NEXT: subl $256, %esp # imm = 0x100
+; X32ABI-SPILL-NEXT: movaps %xmm15, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X32ABI-SPILL-NEXT: movaps %xmm14, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X32ABI-SPILL-NEXT: movaps %xmm13, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X32ABI-SPILL-NEXT: movaps %xmm12, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X32ABI-SPILL-NEXT: movaps %xmm11, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X32ABI-SPILL-NEXT: movaps %xmm10, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X32ABI-SPILL-NEXT: movaps %xmm9, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X32ABI-SPILL-NEXT: movaps %xmm8, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X32ABI-SPILL-NEXT: movl %esp, %ebx
+; X32ABI-SPILL-NEXT: .cfi_offset %rbx, -24
+; X32ABI-SPILL-NEXT: .cfi_offset %xmm8, -160
+; X32ABI-SPILL-NEXT: .cfi_offset %xmm9, -144
+; X32ABI-SPILL-NEXT: .cfi_offset %xmm10, -128
+; X32ABI-SPILL-NEXT: .cfi_offset %xmm11, -112
+; X32ABI-SPILL-NEXT: .cfi_offset %xmm12, -96
+; X32ABI-SPILL-NEXT: .cfi_offset %xmm13, -80
+; X32ABI-SPILL-NEXT: .cfi_offset %xmm14, -64
+; X32ABI-SPILL-NEXT: .cfi_offset %xmm15, -48
+; X32ABI-SPILL-NEXT: movl 16(%ebp), %r14d
+; X32ABI-SPILL-NEXT: callq helper at PLT
+; X32ABI-SPILL-NEXT: # kill: def $eax killed $eax def $rax
+; X32ABI-SPILL-NEXT: leal 31(,%rax,4), %eax
+; X32ABI-SPILL-NEXT: andl $-32, %eax
+; X32ABI-SPILL-NEXT: movl %esp, %ecx
+; X32ABI-SPILL-NEXT: movl %ecx, %edx
+; X32ABI-SPILL-NEXT: subl %eax, %edx
+; X32ABI-SPILL-NEXT: negl %eax
+; X32ABI-SPILL-NEXT: movl %edx, %esp
+; X32ABI-SPILL-NEXT: pushq %rbx
+; X32ABI-SPILL-NEXT: subl $24, %esp
+; X32ABI-SPILL-NEXT: movl $405, %ebx # imm = 0x195
+; X32ABI-SPILL-NEXT: #APP
+; X32ABI-SPILL-NEXT: nop
+; X32ABI-SPILL-NEXT: #NO_APP
+; X32ABI-SPILL-NEXT: #APP
+; X32ABI-SPILL-NEXT: nop
+; X32ABI-SPILL-NEXT: #NO_APP
+; X32ABI-SPILL-NEXT: addl $24, %esp
+; X32ABI-SPILL-NEXT: popq %rbx
+; X32ABI-SPILL-NEXT: movl $8, %edx
+; X32ABI-SPILL-NEXT: #APP
+; X32ABI-SPILL-NEXT: movl %edx, (%ebx)
+; X32ABI-SPILL-NEXT: #NO_APP
+; X32ABI-SPILL-NEXT: movl %r14d, (%ecx,%eax)
+; X32ABI-SPILL-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm8 # 16-byte Reload
+; X32ABI-SPILL-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm9 # 16-byte Reload
+; X32ABI-SPILL-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm10 # 16-byte Reload
+; X32ABI-SPILL-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm11 # 16-byte Reload
+; X32ABI-SPILL-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm12 # 16-byte Reload
+; X32ABI-SPILL-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm13 # 16-byte Reload
+; X32ABI-SPILL-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm14 # 16-byte Reload
+; X32ABI-SPILL-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm15 # 16-byte Reload
+; X32ABI-SPILL-NEXT: leal -8(%ebp), %esp
+; X32ABI-SPILL-NEXT: popq %rbx
+; X32ABI-SPILL-NEXT: popq %rbp
+; X32ABI-SPILL-NEXT: .cfi_def_cfa %rsp, 8
+; X32ABI-SPILL-NEXT: retq
+;
; X32ABI-LABEL: clobber_baseptr_argptr:
; X32ABI: # %bb.0: # %entry
; X32ABI-NEXT: pushq %rbp
@@ -272,8 +405,6 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32
; X32ABI-NEXT: subl %eax, %edx
; X32ABI-NEXT: negl %eax
; X32ABI-NEXT: movl %edx, %esp
-; X32ABI-NEXT: pushq %rbx
-; X32ABI-NEXT: subl $24, %esp
; X32ABI-NEXT: movl $405, %ebx # imm = 0x195
; X32ABI-NEXT: #APP
; X32ABI-NEXT: nop
@@ -281,8 +412,6 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32
; X32ABI-NEXT: #APP
; X32ABI-NEXT: nop
; X32ABI-NEXT: #NO_APP
-; X32ABI-NEXT: addl $24, %esp
-; X32ABI-NEXT: popq %rbx
; X32ABI-NEXT: movl $8, %edx
; X32ABI-NEXT: #APP
; X32ABI-NEXT: movl %edx, (%ebx)
@@ -361,6 +490,51 @@ define void @vmw_host_printf(ptr %fmt, ...) nounwind {
; CHECK-NEXT: leaq -8(%r10), %rsp
; CHECK-NEXT: retq
;
+; X32ABI-SPILL-LABEL: vmw_host_printf:
+; X32ABI-SPILL: # %bb.0: # %entry
+; X32ABI-SPILL-NEXT: pushq %rbp
+; X32ABI-SPILL-NEXT: movl %esp, %ebp
+; X32ABI-SPILL-NEXT: pushq %rbx
+; X32ABI-SPILL-NEXT: andl $-16, %esp
+; X32ABI-SPILL-NEXT: subl $208, %esp
+; X32ABI-SPILL-NEXT: movl %esp, %ebx
+; X32ABI-SPILL-NEXT: movq %rsi, 24(%ebx)
+; X32ABI-SPILL-NEXT: movq %rdx, 32(%ebx)
+; X32ABI-SPILL-NEXT: movq %rcx, 40(%ebx)
+; X32ABI-SPILL-NEXT: movq %r8, 48(%ebx)
+; X32ABI-SPILL-NEXT: movq %r9, 56(%ebx)
+; X32ABI-SPILL-NEXT: testb %al, %al
+; X32ABI-SPILL-NEXT: je .LBB3_2
+; X32ABI-SPILL-NEXT: # %bb.1: # %entry
+; X32ABI-SPILL-NEXT: movaps %xmm0, 64(%ebx)
+; X32ABI-SPILL-NEXT: movaps %xmm1, 80(%ebx)
+; X32ABI-SPILL-NEXT: movaps %xmm2, 96(%ebx)
+; X32ABI-SPILL-NEXT: movaps %xmm3, 112(%ebx)
+; X32ABI-SPILL-NEXT: movaps %xmm4, 128(%ebx)
+; X32ABI-SPILL-NEXT: movaps %xmm5, 144(%ebx)
+; X32ABI-SPILL-NEXT: movaps %xmm6, 160(%ebx)
+; X32ABI-SPILL-NEXT: movaps %xmm7, 176(%ebx)
+; X32ABI-SPILL-NEXT: .LBB3_2: # %entry
+; X32ABI-SPILL-NEXT: leal 16(%rbx), %eax
+; X32ABI-SPILL-NEXT: movl %eax, (%eax)
+; X32ABI-SPILL-NEXT: leal 16(%rbp), %eax
+; X32ABI-SPILL-NEXT: movl %eax, (%eax)
+; X32ABI-SPILL-NEXT: movl $48, (%eax)
+; X32ABI-SPILL-NEXT: movl $8, (%eax)
+; X32ABI-SPILL-NEXT: xorl %eax, %eax
+; X32ABI-SPILL-NEXT: pushq %rbx
+; X32ABI-SPILL-NEXT: subl $24, %esp
+; X32ABI-SPILL-NEXT: xorl %ebx, %ebx
+; X32ABI-SPILL-NEXT: xorl %ecx, %ecx
+; X32ABI-SPILL-NEXT: #APP
+; X32ABI-SPILL-NEXT: #NO_APP
+; X32ABI-SPILL-NEXT: addl $24, %esp
+; X32ABI-SPILL-NEXT: popq %rbx
+; X32ABI-SPILL-NEXT: leal -8(%ebp), %esp
+; X32ABI-SPILL-NEXT: popq %rbx
+; X32ABI-SPILL-NEXT: popq %rbp
+; X32ABI-SPILL-NEXT: retq
+;
; X32ABI-LABEL: vmw_host_printf:
; X32ABI: # %bb.0: # %entry
; X32ABI-NEXT: pushq %rbp
@@ -393,14 +567,10 @@ define void @vmw_host_printf(ptr %fmt, ...) nounwind {
; X32ABI-NEXT: movl $48, (%eax)
; X32ABI-NEXT: movl $8, (%eax)
; X32ABI-NEXT: xorl %eax, %eax
-; X32ABI-NEXT: pushq %rbx
-; X32ABI-NEXT: subl $24, %esp
; X32ABI-NEXT: xorl %ebx, %ebx
; X32ABI-NEXT: xorl %ecx, %ecx
; X32ABI-NEXT: #APP
; X32ABI-NEXT: #NO_APP
-; X32ABI-NEXT: addl $24, %esp
-; X32ABI-NEXT: popq %rbx
; X32ABI-NEXT: leal -8(%ebp), %esp
; X32ABI-NEXT: popq %rbx
; X32ABI-NEXT: popq %rbp
More information about the llvm-commits
mailing list