[llvm] [RISCV] Add stack probring in eliminateCallFramePseudoInstr (PR #139731)

Tue May 13 06:47:07 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-backend-risc-v

Author: Raphael Moreira Zinsly (rzinsly)

<details>
<summary>Changes</summary>

Stack clash protection code was missing from
RISCVFrameLowering::eliminateCallFramePseudoInstr, calling allocateStack fixes it.
This patch also fixes the tests in stack-probing-dynamic.ll that should be testing the stack allocation before a function call.

---
Full diff: https://github.com/llvm/llvm-project/pull/139731.diff


2 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCVFrameLowering.cpp (+16-3) 
- (modified) llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll (+58-24) 


``````````diff

diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 72bec74584059..b80608c05ad57 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -1813,9 +1813,22 @@ MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr(
       if (MI->getOpcode() == RISCV::ADJCALLSTACKDOWN)
         Amount = -Amount;
 
-      const RISCVRegisterInfo &RI = *STI.getRegisterInfo();
-      RI.adjustReg(MBB, MI, DL, SPReg, SPReg, StackOffset::getFixed(Amount),
-                   MachineInstr::NoFlags, getStackAlign());
+      const RISCVTargetLowering *TLI =
+          MF.getSubtarget<RISCVSubtarget>().getTargetLowering();
+      int64_t ProbeSize = TLI->getStackProbeSize(MF, getStackAlign());
+      if (TLI->hasInlineStackProbe(MF) && -Amount >= ProbeSize) {
+        // When stack probing is enabled, the decrement of SP may need to be
+        // probed. We can handle both the decrement and the probing in
+        // allocateStack.
+        bool DynAllocation =
+            MF.getInfo<RISCVMachineFunctionInfo>()->hasDynamicAllocation();
+        allocateStack(MBB, MI, MF, -Amount, -Amount, !hasFP(MF),
+                      /*NeedProbe=*/true, ProbeSize, DynAllocation);
+      } else {
+        const RISCVRegisterInfo &RI = *STI.getRegisterInfo();
+        RI.adjustReg(MBB, MI, DL, SPReg, SPReg, StackOffset::getFixed(Amount),
+                     MachineInstr::NoFlags, getStackAlign());
+      }
     }
   }
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll b/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll
index c3c1643e6de01..604271702ebad 100644
--- a/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll
@@ -361,7 +361,7 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
 ; If a function has variable-sized stack objects, then any function calls which
 ; need to pass arguments on the stack must allocate the stack space for them
 ; dynamically, to ensure they are at the bottom of the frame.
-define void @no_reserved_call_frame(i64 %n, i32 %dummy) #0 {
+define void @no_reserved_call_frame(i64 %n) #0 {
 ; RV64I-LABEL: no_reserved_call_frame:
 ; RV64I:       # %bb.0: # %entry
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -377,15 +377,20 @@ define void @no_reserved_call_frame(i64 %n, i32 %dummy) #0 {
 ; RV64I-NEXT:    addi a0, a0, 15
 ; RV64I-NEXT:    andi a0, a0, -16
 ; RV64I-NEXT:    sub a0, sp, a0
-; RV64I-NEXT:    lui a2, 1
+; RV64I-NEXT:    lui a1, 1
 ; RV64I-NEXT:  .LBB4_1: # %entry
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sub sp, sp, a2
+; RV64I-NEXT:    sub sp, sp, a1
 ; RV64I-NEXT:    sd zero, 0(sp)
 ; RV64I-NEXT:    blt a0, sp, .LBB4_1
 ; RV64I-NEXT:  # %bb.2: # %entry
 ; RV64I-NEXT:    mv sp, a0
+; RV64I-NEXT:    lui a1, 1
+; RV64I-NEXT:    sub sp, sp, a1
+; RV64I-NEXT:    sd zero, 0(sp)
 ; RV64I-NEXT:    call callee_stack_args
+; RV64I-NEXT:    lui a0, 1
+; RV64I-NEXT:    add sp, sp, a0
 ; RV64I-NEXT:    addi sp, s0, -16
 ; RV64I-NEXT:    .cfi_def_cfa sp, 16
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
@@ -407,20 +412,27 @@ define void @no_reserved_call_frame(i64 %n, i32 %dummy) #0 {
 ; RV32I-NEXT:    .cfi_offset s0, -8
 ; RV32I-NEXT:    addi s0, sp, 16
 ; RV32I-NEXT:    .cfi_def_cfa s0, 0
-; RV32I-NEXT:    mv a1, a2
 ; RV32I-NEXT:    slli a0, a0, 2
 ; RV32I-NEXT:    addi a0, a0, 15
 ; RV32I-NEXT:    andi a0, a0, -16
 ; RV32I-NEXT:    sub a0, sp, a0
-; RV32I-NEXT:    lui a2, 1
+; RV32I-NEXT:    lui a1, 1
 ; RV32I-NEXT:  .LBB4_1: # %entry
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    sub sp, sp, a2
+; RV32I-NEXT:    sub sp, sp, a1
 ; RV32I-NEXT:    sw zero, 0(sp)
 ; RV32I-NEXT:    blt a0, sp, .LBB4_1
 ; RV32I-NEXT:  # %bb.2: # %entry
 ; RV32I-NEXT:    mv sp, a0
+; RV32I-NEXT:    lui a1, 1
+; RV32I-NEXT:    sub sp, sp, a1
+; RV32I-NEXT:    sw zero, 0(sp)
+; RV32I-NEXT:    addi sp, sp, -32
+; RV32I-NEXT:    sw zero, 0(sp)
 ; RV32I-NEXT:    call callee_stack_args
+; RV32I-NEXT:    lui a0, 1
+; RV32I-NEXT:    addi a0, a0, 32
+; RV32I-NEXT:    add sp, sp, a0
 ; RV32I-NEXT:    addi sp, s0, -16
 ; RV32I-NEXT:    .cfi_def_cfa sp, 16
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
@@ -432,48 +444,70 @@ define void @no_reserved_call_frame(i64 %n, i32 %dummy) #0 {
 ; RV32I-NEXT:    ret
 entry:
   %v = alloca i32, i64 %n
-  call void @callee_stack_args(ptr %v, i32 %dummy)
+  call void @callee_stack_args(ptr %v, [518 x i64] poison)
   ret void
 }
 
 ; Same as above but without a variable-sized allocation, so the reserved call
 ; frame can be folded into the fixed-size allocation in the prologue.
-define void @reserved_call_frame(i64 %n, i32 %dummy) #0 {
+define void @reserved_call_frame(i64 %n) #0 {
 ; RV64I-LABEL: reserved_call_frame:
 ; RV64I:       # %bb.0: # %entry
-; RV64I-NEXT:    addi sp, sp, -416
-; RV64I-NEXT:    .cfi_def_cfa_offset 416
-; RV64I-NEXT:    sd ra, 408(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    addi sp, sp, -2032
+; RV64I-NEXT:    .cfi_def_cfa_offset 2032
+; RV64I-NEXT:    sd ra, 2024(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    .cfi_offset ra, -8
-; RV64I-NEXT:    addi a0, sp, 8
+; RV64I-NEXT:    lui a0, 1
+; RV64I-NEXT:    sub sp, sp, a0
+; RV64I-NEXT:    sd zero, 0(sp)
+; RV64I-NEXT:    .cfi_def_cfa_offset 4096
+; RV64I-NEXT:    addi sp, sp, -48
+; RV64I-NEXT:    .cfi_def_cfa_offset 4144
+; RV64I-NEXT:    lui a0, 1
+; RV64I-NEXT:    add a0, sp, a0
 ; RV64I-NEXT:    call callee_stack_args
-; RV64I-NEXT:    ld ra, 408(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    lui a0, 1
+; RV64I-NEXT:    addiw a0, a0, 48
+; RV64I-NEXT:    add sp, sp, a0
+; RV64I-NEXT:    .cfi_def_cfa_offset 2032
+; RV64I-NEXT:    ld ra, 2024(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    .cfi_restore ra
-; RV64I-NEXT:    addi sp, sp, 416
+; RV64I-NEXT:    addi sp, sp, 2032
 ; RV64I-NEXT:    .cfi_def_cfa_offset 0
 ; RV64I-NEXT:    ret
 ;
 ; RV32I-LABEL: reserved_call_frame:
 ; RV32I:       # %bb.0: # %entry
-; RV32I-NEXT:    addi sp, sp, -416
-; RV32I-NEXT:    .cfi_def_cfa_offset 416
-; RV32I-NEXT:    sw ra, 412(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    addi sp, sp, -2032
+; RV32I-NEXT:    .cfi_def_cfa_offset 2032
+; RV32I-NEXT:    sw ra, 2028(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    .cfi_offset ra, -4
-; RV32I-NEXT:    mv a1, a2
-; RV32I-NEXT:    addi a0, sp, 12
+; RV32I-NEXT:    lui a0, 1
+; RV32I-NEXT:    sub sp, sp, a0
+; RV32I-NEXT:    sw zero, 0(sp)
+; RV32I-NEXT:    .cfi_def_cfa_offset 4096
+; RV32I-NEXT:    addi sp, sp, -80
+; RV32I-NEXT:    .cfi_def_cfa_offset 4176
+; RV32I-NEXT:    lui a0, 1
+; RV32I-NEXT:    addi a0, a0, 36
+; RV32I-NEXT:    add a0, sp, a0
 ; RV32I-NEXT:    call callee_stack_args
-; RV32I-NEXT:    lw ra, 412(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lui a0, 1
+; RV32I-NEXT:    addi a0, a0, 80
+; RV32I-NEXT:    add sp, sp, a0
+; RV32I-NEXT:    .cfi_def_cfa_offset 2032
+; RV32I-NEXT:    lw ra, 2028(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    .cfi_restore ra
-; RV32I-NEXT:    addi sp, sp, 416
+; RV32I-NEXT:    addi sp, sp, 2032
 ; RV32I-NEXT:    .cfi_def_cfa_offset 0
 ; RV32I-NEXT:    ret
 entry:
-  %v = alloca i32, i64 100
-  call void @callee_stack_args(ptr %v, i32 %dummy)
+  %v = alloca i32, i64 518
+  call void @callee_stack_args(ptr %v, [518 x i64] poison)
   ret void
 }
 
-declare void @callee_stack_args(ptr, i32)
+declare void @callee_stack_args(ptr, [518 x i64])
 
 ; Dynamic allocation of vectors
 define void @dynamic_vector(i64 %size, ptr %out) #0 {

``````````

</details>


https://github.com/llvm/llvm-project/pull/139731