[llvm] [X86][APX] Fix issue of push2/pop2 instr with stack clash protection (PR #145303)

via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 23 03:11:31 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Feng Zou (fzou1)

<details>
<summary>Changes</summary>

When -stack-clash-protection option is specified and APX push2pop2 is enabled, there will be two calls to emitSPUpdate function which emits two STACKALLOC_W_PROBING pseudo instructions. The pseudo instruction for push2 padding is silently ignored which leads to the stack misaligned to 16 bytes and GP exception in runtime. Fixed by directly emitting "push %rax" instruction for push2 padding, instead of calling emitSPUpdate. There was a similar issue on https://reviews.llvm.org/D150033.

---
Full diff: https://github.com/llvm/llvm-project/pull/145303.diff


2 Files Affected:

- (modified) llvm/lib/Target/X86/X86FrameLowering.cpp (+7-3) 
- (added) llvm/test/CodeGen/X86/apx/pp2-with-stack-clash-protection.ll (+69) 


``````````diff
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index 42a09106b3a3a..c96d3c15a8823 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -41,7 +41,7 @@
 STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue");
 STATISTIC(NumFrameExtraProbe,
           "Number of extra stack probes generated in prologue");
-STATISTIC(NumFunctionUsingPush2Pop2, "Number of funtions using push2/pop2");
+STATISTIC(NumFunctionUsingPush2Pop2, "Number of functions using push2/pop2");
 
 using namespace llvm;
 
@@ -3021,8 +3021,12 @@ bool X86FrameLowering::spillCalleeSavedRegisters(
   // Push GPRs. It increases frame size.
   const MachineFunction &MF = *MBB.getParent();
   const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
-  if (X86FI->padForPush2Pop2())
-    emitSPUpdate(MBB, MI, DL, -(int64_t)SlotSize, /*InEpilogue=*/false);
+  if (X86FI->padForPush2Pop2()) {
+    assert(SlotSize == 8 && "Unexpected slot size for padding!");
+    BuildMI(MBB, MI, DL, TII.get(X86::PUSH64r))
+        .addReg(X86::RAX, RegState::Undef)
+        .setMIFlag(MachineInstr::FrameSetup);
+  }
 
   // Update LiveIn of the basic block and decide whether we can add a kill flag
   // to the use.
diff --git a/llvm/test/CodeGen/X86/apx/pp2-with-stack-clash-protection.ll b/llvm/test/CodeGen/X86/apx/pp2-with-stack-clash-protection.ll
new file mode 100644
index 0000000000000..a7795f0b6bc5d
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/pp2-with-stack-clash-protection.ll
@@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64 -mattr=+push2pop2 | FileCheck %s
+
+
+; This test is to check if "pushq %rax" is emitted correctly for push2pop2
+; padding when it's built with -stack-clash-protection option.
+
+define i32 @foo(ptr %src1, i32 %len, ptr %src2, ptr %dst, i1 %cmp, i32 %sub) #0 {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    push2 %r15, %rbp
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    push2 %r13, %r14
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    push2 %rbx, %r12
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    subq $16, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-NEXT:    .cfi_offset %rbx, -64
+; CHECK-NEXT:    .cfi_offset %r12, -56
+; CHECK-NEXT:    .cfi_offset %r13, -48
+; CHECK-NEXT:    .cfi_offset %r14, -40
+; CHECK-NEXT:    .cfi_offset %r15, -32
+; CHECK-NEXT:    .cfi_offset %rbp, -24
+; CHECK-NEXT:    movl %r9d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl %r8d, %ebp
+; CHECK-NEXT:    movq %rcx, %r14
+; CHECK-NEXT:    movq %rdx, %r15
+; CHECK-NEXT:    movl %esi, %r12d
+; CHECK-NEXT:    movq %rdi, %r13
+; CHECK-NEXT:    xorl %ebx, %ebx
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  .LBB0_1: # %while.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %r15, %rdi
+; CHECK-NEXT:    movq %r13, %rsi
+; CHECK-NEXT:    movq %r14, %rdx
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    movl %r12d, %r8d
+; CHECK-NEXT:    callq *%rbx
+; CHECK-NEXT:    testb $1, %bpl
+; CHECK-NEXT:    jne .LBB0_1
+; CHECK-NEXT:  # %bb.2: # %while.end
+; CHECK-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; CHECK-NEXT:    addq $16, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    pop2 %r12, %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    pop2 %r14, %r13
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    pop2 %rbp, %r15
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    popq %rcx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+entry:
+  br label %while.body
+
+while.body:                                       ; preds = %while.body, %entry
+  %call5 = tail call i32 null(ptr %src2, ptr %src1, ptr %dst, i32 0, i32 %len)
+  br i1 %cmp, label %while.body, label %while.end
+
+while.end:                                        ; preds = %while.body
+  ret i32 %sub
+}
+
+attributes #0 = { "probe-stack"="inline-asm"}

``````````

</details>


https://github.com/llvm/llvm-project/pull/145303


More information about the llvm-commits mailing list