[llvm-branch-commits] [llvm] release/22.x v2: [CodeGen][RISCV] Inline stack probes immediately after allocateStack in eliminateCallFramePseudoInstr (#195456) (PR #202882)
Rong Mantle Bao via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Jun 10 01:09:55 PDT 2026
https://github.com/CSharperMantle created https://github.com/llvm/llvm-project/pull/202882
Backport 589faedadf141e5e63f7a1e92a0327fc9bdc9b09 to release/22.x.
Supersedes #202821.
>From cee06e47014e8d74bd7918229a3f65c5c97b84d3 Mon Sep 17 00:00:00 2001
From: "Rong \"Mantle\" Bao" <rong.bao at csmantle.top>
Date: Fri, 8 May 2026 08:14:18 +0800
Subject: [PATCH] Inline stack probes immediately after `allocateStack` in
`eliminateCallFramePseudoInstr` (#195456)
[ Upstream commit 589faedadf141e5e63f7a1e92a0327fc9bdc9b09 ]
Revert `bltu` in probing loops to `blt` because commit
f162be248636046a20e71209e139347e084b637a isn't applied on release/22.x
yet.
Link: https://github.com/llvm/llvm-project/pull/192485 ("[RISCV] Use
unsigned comparison for stack clash probing loop")
---
This PR adds a call to `inlineStackProbe` immediately after
`allocateStack` in `eliminateCallFramePseudoInstr`. This allows code
generation for stack probe pseudoinstructions in non-entry BBs.
Fixes #195454.
---
llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 1 +
.../RISCV/stack-probing-dynamic-nonentry.ll | 115 ++++++++++++++++++
2 files changed, 116 insertions(+)
create mode 100644 llvm/test/CodeGen/RISCV/stack-probing-dynamic-nonentry.ll
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 8246623e8e5aa..20b43538d69c4 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -1898,6 +1898,7 @@ MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr(
needsDwarfCFI(MF) && !hasFP(MF),
/*NeedProbe=*/true, ProbeSize, DynAllocation,
MachineInstr::NoFlags);
+ inlineStackProbe(MF, MBB);
} else {
const RISCVRegisterInfo &RI = *STI.getRegisterInfo();
RI.adjustReg(MBB, MI, DL, SPReg, SPReg, StackOffset::getFixed(Amount),
diff --git a/llvm/test/CodeGen/RISCV/stack-probing-dynamic-nonentry.ll b/llvm/test/CodeGen/RISCV/stack-probing-dynamic-nonentry.ll
new file mode 100644
index 0000000000000..4c8bb653b4cff
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/stack-probing-dynamic-nonentry.ll
@@ -0,0 +1,115 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=riscv64 -mattr=+m -O2 < %s | FileCheck %s -check-prefix=RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m -O2 < %s | FileCheck %s -check-prefix=RV32
+
+; Test that very large outgoing call frames in functions with variable-sized
+; objects get proper stack probing. The outgoing args are large enough to force
+; the PROBED_STACKALLOC path, which must be expanded in a non-entry block.
+
+define void @f(i64 %n) #0 {
+; RV64-LABEL: f:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd zero, 0(sp)
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
+; RV64-NEXT: addi s0, sp, 16
+; RV64-NEXT: .cfi_def_cfa s0, 0
+; RV64-NEXT: slli a0, a0, 2
+; RV64-NEXT: addi a0, a0, 15
+; RV64-NEXT: andi a0, a0, -16
+; RV64-NEXT: sub a0, sp, a0
+; RV64-NEXT: lui a1, 1
+; RV64-NEXT: .LBB0_1: # %entry
+; RV64-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64-NEXT: sub sp, sp, a1
+; RV64-NEXT: sd zero, 0(sp)
+; RV64-NEXT: blt a0, sp, .LBB0_1
+; RV64-NEXT: # %bb.2: # %entry
+; RV64-NEXT: mv sp, a0
+; RV64-NEXT: lui a1, 5
+; RV64-NEXT: sub t1, sp, a1
+; RV64-NEXT: lui t2, 1
+; RV64-NEXT: .LBB0_3: # %entry
+; RV64-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64-NEXT: sub sp, sp, t2
+; RV64-NEXT: sd zero, 0(sp)
+; RV64-NEXT: bne sp, t1, .LBB0_3
+; RV64-NEXT: # %bb.4: # %entry
+; RV64-NEXT: addi sp, sp, -2048
+; RV64-NEXT: addi sp, sp, -1424
+; RV64-NEXT: sd zero, 0(sp)
+; RV64-NEXT: call g
+; RV64-NEXT: lui a0, 6
+; RV64-NEXT: addi a0, a0, -624
+; RV64-NEXT: add sp, sp, a0
+; RV64-NEXT: addi sp, s0, -16
+; RV64-NEXT: .cfi_def_cfa sp, 16
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; RV64-NEXT: .cfi_restore ra
+; RV64-NEXT: .cfi_restore s0
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: .cfi_def_cfa_offset 0
+; RV64-NEXT: ret
+;
+; RV32-LABEL: f:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw zero, 0(sp)
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
+; RV32-NEXT: addi s0, sp, 16
+; RV32-NEXT: .cfi_def_cfa s0, 0
+; RV32-NEXT: slli a0, a0, 2
+; RV32-NEXT: addi a0, a0, 15
+; RV32-NEXT: andi a0, a0, -16
+; RV32-NEXT: sub a0, sp, a0
+; RV32-NEXT: lui a1, 1
+; RV32-NEXT: .LBB0_1: # %entry
+; RV32-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32-NEXT: sub sp, sp, a1
+; RV32-NEXT: sw zero, 0(sp)
+; RV32-NEXT: blt a0, sp, .LBB0_1
+; RV32-NEXT: # %bb.2: # %entry
+; RV32-NEXT: mv sp, a0
+; RV32-NEXT: lui a1, 5
+; RV32-NEXT: sub t1, sp, a1
+; RV32-NEXT: lui t2, 1
+; RV32-NEXT: .LBB0_3: # %entry
+; RV32-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32-NEXT: sub sp, sp, t2
+; RV32-NEXT: sw zero, 0(sp)
+; RV32-NEXT: bne sp, t1, .LBB0_3
+; RV32-NEXT: # %bb.4: # %entry
+; RV32-NEXT: addi sp, sp, -2048
+; RV32-NEXT: addi sp, sp, -1456
+; RV32-NEXT: sw zero, 0(sp)
+; RV32-NEXT: call g
+; RV32-NEXT: lui a0, 6
+; RV32-NEXT: addi a0, a0, -592
+; RV32-NEXT: add sp, sp, a0
+; RV32-NEXT: addi sp, s0, -16
+; RV32-NEXT: .cfi_def_cfa sp, 16
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-NEXT: .cfi_restore ra
+; RV32-NEXT: .cfi_restore s0
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: .cfi_def_cfa_offset 0
+; RV32-NEXT: ret
+entry:
+ %v = alloca i32, i64 %n
+ call void @g(ptr %v, [3000 x i64] poison)
+ ret void
+}
+
+declare void @g(ptr, [3000 x i64])
+
+attributes #0 = { "probe-stack"="inline-asm" }
More information about the llvm-branch-commits
mailing list