[llvm] [X86][APX] Fix issue of push2/pop2 instr with stack clash protection (PR #145303)

Feng Zou via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 23 03:11:02 PDT 2025


https://github.com/fzou1 created https://github.com/llvm/llvm-project/pull/145303

When -stack-clash-protection option is specified and APX push2pop2 is enabled, there will be two calls to emitSPUpdate function which emits two STACKALLOC_W_PROBING pseudo instructions. The pseudo instruction for push2 padding is silently ignored which leads to the stack misaligned to 16 bytes and GP exception in runtime. Fixed by directly emitting "push %rax" instruction for push2 padding, instead of calling emitSPUpdate. There was a similar issue on https://reviews.llvm.org/D150033.

>From f6feab605a3378ae8837f83e71305c4dcaabb740 Mon Sep 17 00:00:00 2001
From: Feng Zou <feng.zou at intel.com>
Date: Thu, 19 Jun 2025 16:55:24 +0800
Subject: [PATCH] [X86][APX] Fix the issue of push2/pop2 instruction with stack
 clash protection

When -stack-clash-protection option is specified and APX push2pop2 is enabled,
there will be two calls to emitSPUpdate function which emits two
STACKALLOC_W_PROBING pseudo instructions. The pseudo instruction for push2
padding is silently ignored which leads to the stack misaligned to 16 bytes
and GP exception in runtime. Fixed by directly emitting "push %rax" instruction
for push2 padding, instead of calling emitSPUpdate. There was a similar issue
on https://reviews.llvm.org/D150033.
---
 llvm/lib/Target/X86/X86FrameLowering.cpp      | 10 ++-
 .../apx/pp2-with-stack-clash-protection.ll    | 69 +++++++++++++++++++
 2 files changed, 76 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/apx/pp2-with-stack-clash-protection.ll

diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index 42a09106b3a3a..c96d3c15a8823 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -41,7 +41,7 @@
 STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue");
 STATISTIC(NumFrameExtraProbe,
           "Number of extra stack probes generated in prologue");
-STATISTIC(NumFunctionUsingPush2Pop2, "Number of funtions using push2/pop2");
+STATISTIC(NumFunctionUsingPush2Pop2, "Number of functions using push2/pop2");
 
 using namespace llvm;
 
@@ -3021,8 +3021,12 @@ bool X86FrameLowering::spillCalleeSavedRegisters(
   // Push GPRs. It increases frame size.
   const MachineFunction &MF = *MBB.getParent();
   const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
-  if (X86FI->padForPush2Pop2())
-    emitSPUpdate(MBB, MI, DL, -(int64_t)SlotSize, /*InEpilogue=*/false);
+  if (X86FI->padForPush2Pop2()) {
+    assert(SlotSize == 8 && "Unexpected slot size for padding!");
+    BuildMI(MBB, MI, DL, TII.get(X86::PUSH64r))
+        .addReg(X86::RAX, RegState::Undef)
+        .setMIFlag(MachineInstr::FrameSetup);
+  }
 
   // Update LiveIn of the basic block and decide whether we can add a kill flag
   // to the use.
diff --git a/llvm/test/CodeGen/X86/apx/pp2-with-stack-clash-protection.ll b/llvm/test/CodeGen/X86/apx/pp2-with-stack-clash-protection.ll
new file mode 100644
index 0000000000000..a7795f0b6bc5d
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/pp2-with-stack-clash-protection.ll
@@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64 -mattr=+push2pop2 | FileCheck %s
+
+
+; This test is to check if "pushq %rax" is emitted correctly for push2pop2
+; padding when it's built with -stack-clash-protection option.
+
+define i32 @foo(ptr %src1, i32 %len, ptr %src2, ptr %dst, i1 %cmp, i32 %sub) #0 {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    push2 %r15, %rbp
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    push2 %r13, %r14
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    push2 %rbx, %r12
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    subq $16, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-NEXT:    .cfi_offset %rbx, -64
+; CHECK-NEXT:    .cfi_offset %r12, -56
+; CHECK-NEXT:    .cfi_offset %r13, -48
+; CHECK-NEXT:    .cfi_offset %r14, -40
+; CHECK-NEXT:    .cfi_offset %r15, -32
+; CHECK-NEXT:    .cfi_offset %rbp, -24
+; CHECK-NEXT:    movl %r9d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl %r8d, %ebp
+; CHECK-NEXT:    movq %rcx, %r14
+; CHECK-NEXT:    movq %rdx, %r15
+; CHECK-NEXT:    movl %esi, %r12d
+; CHECK-NEXT:    movq %rdi, %r13
+; CHECK-NEXT:    xorl %ebx, %ebx
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  .LBB0_1: # %while.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %r15, %rdi
+; CHECK-NEXT:    movq %r13, %rsi
+; CHECK-NEXT:    movq %r14, %rdx
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    movl %r12d, %r8d
+; CHECK-NEXT:    callq *%rbx
+; CHECK-NEXT:    testb $1, %bpl
+; CHECK-NEXT:    jne .LBB0_1
+; CHECK-NEXT:  # %bb.2: # %while.end
+; CHECK-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; CHECK-NEXT:    addq $16, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    pop2 %r12, %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    pop2 %r14, %r13
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    pop2 %rbp, %r15
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    popq %rcx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+entry:
+  br label %while.body
+
+while.body:                                       ; preds = %while.body, %entry
+  %call5 = tail call i32 null(ptr %src2, ptr %src1, ptr %dst, i32 0, i32 %len)
+  br i1 %cmp, label %while.body, label %while.end
+
+while.end:                                        ; preds = %while.body
+  ret i32 %sub
+}
+
+attributes #0 = { "probe-stack"="inline-asm"}



More information about the llvm-commits mailing list