[llvm] [X86][APX] Prevent from emitting push2/pop2 if stack alignment<16B (PR #143076)
Feng Zou via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 5 23:19:27 PDT 2025
https://github.com/fzou1 created https://github.com/llvm/llvm-project/pull/143076
push2/pop2 requires 16-byte stack alignment. If the stack alignment is less than that, push2/pop2 should not be emitted.
>From 4c71a9845349753b8e8454452c04a4a9b5331424 Mon Sep 17 00:00:00 2001
From: Feng Zou <feng.zou at intel.com>
Date: Fri, 6 Jun 2025 12:48:50 +0800
Subject: [PATCH] [X86][APX] Prevent from emitting push2/pop2 when stack
alignment < 16 bytes
push2/pop2 requires 16 bytes stack alignment. If the stack alignment is
less than that, push2/pop2 should not be emitted.
---
llvm/lib/Target/X86/X86FrameLowering.cpp | 2 +-
...op2-disabled-with-small-stack-alignment.ll | 209 ++++++++++++++++++
2 files changed, 210 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/X86/apx/push2-pop2-disabled-with-small-stack-alignment.ll
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index 75f49beee27c6..09b036b5f0c77 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -2921,7 +2921,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
// 3. When the number of CSR push is even, start to use push2 from the 1st
// push and make the stack 16B aligned before the push
unsigned NumRegsForPush2 = 0;
- if (STI.hasPush2Pop2()) {
+ if (STI.hasPush2Pop2() && getStackAlignment() >= 16) {
unsigned NumCSGPR = llvm::count_if(CSI, [](const CalleeSavedInfo &I) {
return X86::GR64RegClass.contains(I.getReg());
});
diff --git a/llvm/test/CodeGen/X86/apx/push2-pop2-disabled-with-small-stack-alignment.ll b/llvm/test/CodeGen/X86/apx/push2-pop2-disabled-with-small-stack-alignment.ll
new file mode 100644
index 0000000000000..d72c0bbcbcc7d
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/push2-pop2-disabled-with-small-stack-alignment.ll
@@ -0,0 +1,209 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+push2pop2 | FileCheck %s --check-prefix=CHECK
+
+; This test is used to check no push2/pop2 emitted if stack alignment is set to
+; the value less than 16 bytes required by push2/pop2 instruction. Here it's set
+; to 8 bytes.
+
+define void @csr1() nounwind {
+; CHECK-LABEL: csr1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: #APP
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: retq
+entry:
+ tail call void asm sideeffect "", "~{rbp},~{dirflag},~{fpsr},~{flags}"()
+ ret void
+}
+
+define void @csr2() nounwind {
+; CHECK-LABEL: csr2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: pushq %r15
+; CHECK-NEXT: #APP
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: retq
+entry:
+ tail call void asm sideeffect "", "~{rbp},~{r15},~{dirflag},~{fpsr},~{flags}"()
+ ret void
+}
+
+define void @csr3() nounwind {
+; CHECK-LABEL: csr3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: pushq %r15
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: #APP
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: retq
+entry:
+ tail call void asm sideeffect "", "~{rbp},~{r15},~{r14},~{dirflag},~{fpsr},~{flags}"()
+ ret void
+}
+
+define void @csr4() nounwind {
+; CHECK-LABEL: csr4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: pushq %r15
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: pushq %r13
+; CHECK-NEXT: #APP
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: popq %r13
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: retq
+entry:
+ tail call void asm sideeffect "", "~{rbp},~{r15},~{r14},~{r13},~{dirflag},~{fpsr},~{flags}"()
+ ret void
+}
+
+define void @csr5() nounwind {
+; CHECK-LABEL: csr5:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: pushq %r15
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: pushq %r13
+; CHECK-NEXT: pushq %r12
+; CHECK-NEXT: #APP
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: popq %r12
+; CHECK-NEXT: popq %r13
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: retq
+entry:
+ tail call void asm sideeffect "", "~{rbp},~{r15},~{r14},~{r13},~{r12},~{dirflag},~{fpsr},~{flags}"()
+ ret void
+}
+
+define void @csr6() nounwind {
+; CHECK-LABEL: csr6:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: pushq %r15
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: pushq %r13
+; CHECK-NEXT: pushq %r12
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: #APP
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: popq %r12
+; CHECK-NEXT: popq %r13
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: retq
+entry:
+ tail call void asm sideeffect "", "~{rbp},~{r15},~{r14},~{r13},~{r12},~{rbx},~{dirflag},~{fpsr},~{flags}"()
+ ret void
+}
+
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
+
+define void @lea_in_epilog(i1 %arg, ptr %arg1, ptr %arg2, i64 %arg3, i64 %arg4, i64 %arg5, i64 %arg6, i64 %arg7, i64 %arg8, i64 %arg9, i64 %arg10) nounwind {
+; CHECK-LABEL: lea_in_epilog:
+; CHECK: # %bb.0: # %bb
+; CHECK-NEXT: testb $1, %dil
+; CHECK-NEXT: je .LBB6_5
+; CHECK-NEXT: # %bb.1: # %bb13
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: pushq %r15
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: pushq %r13
+; CHECK-NEXT: pushq %r12
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: subq $16, %rsp
+; CHECK-NEXT: movq %r9, %r14
+; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: addq {{[0-9]+}}(%rsp), %r14
+; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r13
+; CHECK-NEXT: addq %r14, %r13
+; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r15
+; CHECK-NEXT: addq %r14, %r15
+; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbx
+; CHECK-NEXT: addq %r14, %rbx
+; CHECK-NEXT: xorl %ebp, %ebp
+; CHECK-NEXT: xorl %r12d, %r12d
+; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB6_2: # %bb15
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: incq %r12
+; CHECK-NEXT: movl $432, %edx # imm = 0x1B0
+; CHECK-NEXT: xorl %edi, %edi
+; CHECK-NEXT: movq %r15, %rsi
+; CHECK-NEXT: callq memcpy at PLT
+; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload
+; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; CHECK-NEXT: addq %rax, %r13
+; CHECK-NEXT: addq %rax, %r15
+; CHECK-NEXT: addq %rax, %rbx
+; CHECK-NEXT: addq %rax, %r14
+; CHECK-NEXT: addq $8, %rbp
+; CHECK-NEXT: testb $1, %dil
+; CHECK-NEXT: je .LBB6_2
+; CHECK-NEXT: # %bb.3: # %bb11
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsp
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: popq %r12
+; CHECK-NEXT: popq %r13
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: jne .LBB6_5
+; CHECK-NEXT: # %bb.4: # %bb12
+; CHECK-NEXT: movq $0, (%rax)
+; CHECK-NEXT: .LBB6_5: # %bb14
+; CHECK-NEXT: retq
+bb:
+ br i1 %arg, label %bb13, label %bb14
+
+bb11:
+ br i1 %arg, label %bb14, label %bb12
+
+bb12:
+ store double 0.000000e+00, ptr %arg1, align 8
+ br label %bb14
+
+bb13:
+ %getelementptr = getelementptr i8, ptr null, i64 %arg5
+ br label %bb15
+
+bb14:
+ ret void
+
+bb15:
+ %phi = phi i64 [ 0, %bb13 ], [ %add, %bb15 ]
+ %getelementptr16 = getelementptr double, ptr null, i64 %phi
+ %add = add i64 %phi, 1
+ %mul = mul i64 %arg6, %add
+ %getelementptr17 = getelementptr i8, ptr %getelementptr, i64 %mul
+ call void @llvm.memcpy.p0.p0.i64(ptr %getelementptr16, ptr %getelementptr17, i64 0, i1 false)
+ %getelementptr18 = getelementptr i8, ptr %getelementptr17, i64 %arg7
+ %getelementptr19 = getelementptr i8, ptr %getelementptr17, i64 %arg8
+ call void @llvm.memcpy.p0.p0.i64(ptr null, ptr %getelementptr19, i64 0, i1 false)
+ %getelementptr20 = getelementptr i8, ptr %getelementptr17, i64 %arg9
+ call void @llvm.memcpy.p0.p0.i64(ptr null, ptr %getelementptr20, i64 432, i1 false)
+ %getelementptr21 = getelementptr i8, ptr %getelementptr17, i64 %arg10
+ call void @llvm.memcpy.p0.p0.i64(ptr null, ptr %getelementptr21, i64 0, i1 false)
+ br i1 %arg, label %bb11, label %bb15
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"override-stack-alignment", i32 8}
More information about the llvm-commits
mailing list