[llvm-branch-commits] [llvm] release/22.x v2: [CodeGen][RISCV] Inline stack probes immediately after allocateStack in eliminateCallFramePseudoInstr (#195456) (PR #202882)

Wed Jun 10 01:10:43 PDT 2026

llvmorg-github-actions[bot] wrote:




@llvm/pr-subscribers-backend-risc-v

Author: Rong "Mantle" Bao (CSharperMantle)

<details>
<summary>Changes</summary>

Backport 589faedadf141e5e63f7a1e92a0327fc9bdc9b09 to release/22.x.

Supersedes #202821.

---
Full diff: https://github.com/llvm/llvm-project/pull/202882.diff


2 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCVFrameLowering.cpp (+1) 
- (added) llvm/test/CodeGen/RISCV/stack-probing-dynamic-nonentry.ll (+115) 


``````````diff

diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 8246623e8e5aa..20b43538d69c4 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -1898,6 +1898,7 @@ MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr(
                       needsDwarfCFI(MF) && !hasFP(MF),
                       /*NeedProbe=*/true, ProbeSize, DynAllocation,
                       MachineInstr::NoFlags);
+        inlineStackProbe(MF, MBB);
       } else {
         const RISCVRegisterInfo &RI = *STI.getRegisterInfo();
         RI.adjustReg(MBB, MI, DL, SPReg, SPReg, StackOffset::getFixed(Amount),
diff --git a/llvm/test/CodeGen/RISCV/stack-probing-dynamic-nonentry.ll b/llvm/test/CodeGen/RISCV/stack-probing-dynamic-nonentry.ll
new file mode 100644
index 0000000000000..4c8bb653b4cff
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/stack-probing-dynamic-nonentry.ll
@@ -0,0 +1,115 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=riscv64 -mattr=+m -O2 < %s | FileCheck %s -check-prefix=RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m -O2 < %s | FileCheck %s -check-prefix=RV32
+
+; Test that very large outgoing call frames in functions with variable-sized
+; objects get proper stack probing. The outgoing args are large enough to force
+; the PROBED_STACKALLOC path, which must be expanded in a non-entry block.
+
+define void @f(i64 %n) #0 {
+; RV64-LABEL: f:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    sd zero, 0(sp)
+; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    .cfi_offset s0, -16
+; RV64-NEXT:    addi s0, sp, 16
+; RV64-NEXT:    .cfi_def_cfa s0, 0
+; RV64-NEXT:    slli a0, a0, 2
+; RV64-NEXT:    addi a0, a0, 15
+; RV64-NEXT:    andi a0, a0, -16
+; RV64-NEXT:    sub a0, sp, a0
+; RV64-NEXT:    lui a1, 1
+; RV64-NEXT:  .LBB0_1: # %entry
+; RV64-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    sd zero, 0(sp)
+; RV64-NEXT:    blt a0, sp, .LBB0_1
+; RV64-NEXT:  # %bb.2: # %entry
+; RV64-NEXT:    mv sp, a0
+; RV64-NEXT:    lui a1, 5
+; RV64-NEXT:    sub t1, sp, a1
+; RV64-NEXT:    lui t2, 1
+; RV64-NEXT:  .LBB0_3: # %entry
+; RV64-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64-NEXT:    sub sp, sp, t2
+; RV64-NEXT:    sd zero, 0(sp)
+; RV64-NEXT:    bne sp, t1, .LBB0_3
+; RV64-NEXT:  # %bb.4: # %entry
+; RV64-NEXT:    addi sp, sp, -2048
+; RV64-NEXT:    addi sp, sp, -1424
+; RV64-NEXT:    sd zero, 0(sp)
+; RV64-NEXT:    call g
+; RV64-NEXT:    lui a0, 6
+; RV64-NEXT:    addi a0, a0, -624
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, s0, -16
+; RV64-NEXT:    .cfi_def_cfa sp, 16
+; RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64-NEXT:    .cfi_restore ra
+; RV64-NEXT:    .cfi_restore s0
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    .cfi_def_cfa_offset 0
+; RV64-NEXT:    ret
+;
+; RV32-LABEL: f:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw zero, 0(sp)
+; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    .cfi_offset s0, -8
+; RV32-NEXT:    addi s0, sp, 16
+; RV32-NEXT:    .cfi_def_cfa s0, 0
+; RV32-NEXT:    slli a0, a0, 2
+; RV32-NEXT:    addi a0, a0, 15
+; RV32-NEXT:    andi a0, a0, -16
+; RV32-NEXT:    sub a0, sp, a0
+; RV32-NEXT:    lui a1, 1
+; RV32-NEXT:  .LBB0_1: # %entry
+; RV32-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    sw zero, 0(sp)
+; RV32-NEXT:    blt a0, sp, .LBB0_1
+; RV32-NEXT:  # %bb.2: # %entry
+; RV32-NEXT:    mv sp, a0
+; RV32-NEXT:    lui a1, 5
+; RV32-NEXT:    sub t1, sp, a1
+; RV32-NEXT:    lui t2, 1
+; RV32-NEXT:  .LBB0_3: # %entry
+; RV32-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32-NEXT:    sub sp, sp, t2
+; RV32-NEXT:    sw zero, 0(sp)
+; RV32-NEXT:    bne sp, t1, .LBB0_3
+; RV32-NEXT:  # %bb.4: # %entry
+; RV32-NEXT:    addi sp, sp, -2048
+; RV32-NEXT:    addi sp, sp, -1456
+; RV32-NEXT:    sw zero, 0(sp)
+; RV32-NEXT:    call g
+; RV32-NEXT:    lui a0, 6
+; RV32-NEXT:    addi a0, a0, -592
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, s0, -16
+; RV32-NEXT:    .cfi_def_cfa sp, 16
+; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-NEXT:    .cfi_restore ra
+; RV32-NEXT:    .cfi_restore s0
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    .cfi_def_cfa_offset 0
+; RV32-NEXT:    ret
+entry:
+  %v = alloca i32, i64 %n
+  call void @g(ptr %v, [3000 x i64] poison)
+  ret void
+}
+
+declare void @g(ptr, [3000 x i64])
+
+attributes #0 = { "probe-stack"="inline-asm" }

``````````

</details>


https://github.com/llvm/llvm-project/pull/202882