[llvm] [RISCV] Fix musttail with indirect arguments by forwarding incoming pointers (PR #185094)
Xavier Roche via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 7 04:02:46 PST 2026
https://github.com/xroche updated https://github.com/llvm/llvm-project/pull/185094
>From 0166f898e46c6ece930361691af37ed80c4d26bf Mon Sep 17 00:00:00 2001
From: Xavier Roche <xavier.roche at algolia.com>
Date: Fri, 6 Mar 2026 20:41:00 +0100
Subject: [PATCH 1/3] [RISCV] Fix musttail with indirect arguments by
forwarding incoming pointers
When a musttail call has arguments passed indirectly
(CCValAssign::Indirect), the current code creates a new stack
temporary and copies the data there. The tail call then deallocates
the caller's stack frame, leaving the pointer dangling.
Fix this by forwarding the original incoming indirect pointer
instead of re-spilling. Since musttail guarantees matching
prototypes, incoming and outgoing indirect args have a 1:1
correspondence, and the incoming pointer (from the caller's caller's
frame) remains valid after the tail call.
This also subsumes the non-musttail indirect args fix: non-musttail
calls with indirect args are rejected from tail call optimization
(the pointer would dangle), while musttail calls forward the
incoming pointer.
Fixes #185089.
Assisted-by: Claude (Anthropic)
Co-Authored-By: Claude Opus 4.6 <noreply at anthropic.com>
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 118 +++++++++++-------
.../Target/RISCV/RISCVMachineFunctionInfo.h | 13 ++
.../CodeGen/RISCV/musttail-indirect-args.ll | 66 ++++++++++
llvm/test/CodeGen/RISCV/tail-calls.ll | 18 ++-
4 files changed, 167 insertions(+), 48 deletions(-)
create mode 100644 llvm/test/CodeGen/RISCV/musttail-indirect-args.ll
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 17d7db95886ab..68cff3332b873 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -24454,6 +24454,8 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL, *this);
if (VA.getLocInfo() == CCValAssign::Indirect) {
+ // Save the incoming indirect pointer for musttail forwarding.
+ RVFI->addIncomingIndirectArg(ArgValue);
// If the original argument was split and passed by reference (e.g. i128
// on RV32), we need to load all parts of it here (using the same
// address). Vectors may be partly split to registers and partly to the
@@ -24578,6 +24580,19 @@ bool RISCVTargetLowering::isEligibleForTailCallOptimization(
if (CCInfo.getStackSize() > RVFI->getArgumentStackSize())
return false;
+ // Do not tail call optimize if any argument needs to be passed indirectly.
+ // The caller allocates stack space and passes a pointer to the callee. On a
+ // tail call the caller's stack frame is deallocated before the callee
+ // executes, invalidating the pointer (use-after-free).
+ // musttail is excluded: callers forward incoming indirect pointers that
+ // point to the caller's caller's frame, which remains valid.
+ if (!CLI.CB || !CLI.CB->isMustTailCall()) {
+ for (const auto &VA : ArgLocs) {
+ if (VA.getLocInfo() == CCValAssign::Indirect)
+ return false;
+ }
+ }
+
// Do not tail call opt if either caller or callee uses struct return
// semantics.
auto IsCallerStructRet = Caller.hasStructRetAttr();
@@ -24765,51 +24780,70 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Promote the value if needed.
// For now, only handle fully promoted and indirect arguments.
if (VA.getLocInfo() == CCValAssign::Indirect) {
- // Store the argument in a stack slot and pass its address.
- Align StackAlign =
- std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
- getPrefTypeAlign(ArgValue.getValueType(), DAG));
- TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
- // If the original argument was split (e.g. i128), we need
- // to store the required parts of it here (and pass just one address).
- // Vectors may be partly split to registers and partly to the stack, in
- // which case the base address is partly offset and subsequent stores are
- // relative to that.
- unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
- unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
- assert(VA.getValVT().isVector() || ArgPartOffset == 0);
- // Calculate the total size to store. We don't have access to what we're
- // actually storing other than performing the loop and collecting the
- // info.
- SmallVector<std::pair<SDValue, SDValue>> Parts;
- while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
- SDValue PartValue = OutVals[OutIdx + 1];
- unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
- SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
- EVT PartVT = PartValue.getValueType();
- if (PartVT.isScalableVector())
- Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
- StoredSize += PartVT.getStoreSize();
- StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
- Parts.push_back(std::make_pair(PartValue, Offset));
- ++i;
- ++OutIdx;
- }
- SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
- int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
- MemOpChains.push_back(
- DAG.getStore(Chain, DL, ArgValue, SpillSlot,
- MachinePointerInfo::getFixedStack(MF, FI)));
- for (const auto &Part : Parts) {
- SDValue PartValue = Part.first;
- SDValue PartOffset = Part.second;
- SDValue Address =
- DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
+ // For musttail calls, forward the incoming indirect pointer instead
+ // of creating a new stack temporary. The incoming pointer points to
+ // the caller's caller's frame, which remains valid after a tail call.
+ if (IsTailCall && CLI.CB && CLI.CB->isMustTailCall()) {
+ unsigned IndirectIdx = 0;
+ for (unsigned k = 0; k < OutIdx; ++k) {
+ if (ArgLocs[k].getLocInfo() == CCValAssign::Indirect)
+ ++IndirectIdx;
+ }
+ ArgValue = RVFI->getIncomingIndirectArg(IndirectIdx);
+ // Skip any split parts of this argument (they are covered by the
+ // forwarded pointer).
+ unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
+ while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
+ ++i;
+ ++OutIdx;
+ }
+ } else {
+ // Store the argument in a stack slot and pass its address.
+ Align StackAlign =
+ std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
+ getPrefTypeAlign(ArgValue.getValueType(), DAG));
+ TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
+ // If the original argument was split (e.g. i128), we need
+ // to store the required parts of it here (and pass just one address).
+ // Vectors may be partly split to registers and partly to the stack, in
+ // which case the base address is partly offset and subsequent stores
+ // are relative to that.
+ unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
+ unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
+ assert(VA.getValVT().isVector() || ArgPartOffset == 0);
+ // Calculate the total size to store. We don't have access to what
+ // we're actually storing other than performing the loop and collecting
+ // the info.
+ SmallVector<std::pair<SDValue, SDValue>> Parts;
+ while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
+ SDValue PartValue = OutVals[OutIdx + 1];
+ unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
+ SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
+ EVT PartVT = PartValue.getValueType();
+ if (PartVT.isScalableVector())
+ Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
+ StoredSize += PartVT.getStoreSize();
+ StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
+ Parts.push_back(std::make_pair(PartValue, Offset));
+ ++i;
+ ++OutIdx;
+ }
+ SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
+ int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
MemOpChains.push_back(
- DAG.getStore(Chain, DL, PartValue, Address,
+ DAG.getStore(Chain, DL, ArgValue, SpillSlot,
MachinePointerInfo::getFixedStack(MF, FI)));
+ for (const auto &Part : Parts) {
+ SDValue PartValue = Part.first;
+ SDValue PartOffset = Part.second;
+ SDValue Address =
+ DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
+ MemOpChains.push_back(
+ DAG.getStore(Chain, DL, PartValue, Address,
+ MachinePointerInfo::getFixedStack(MF, FI)));
+ }
+ ArgValue = SpillSlot;
}
- ArgValue = SpillSlot;
} else {
ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
}
diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
index e23f162a317ef..65b7226025da5 100644
--- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
@@ -73,6 +73,9 @@ class RISCVMachineFunctionInfo : public MachineFunctionInfo {
/// Incoming ByVal arguments
SmallVector<SDValue, 8> IncomingByValArgs;
+ /// Incoming indirect argument pointers (for musttail forwarding)
+ SmallVector<SDValue, 4> IncomingIndirectArgs;
+
/// Is there any vector argument or return?
bool IsVectorCall = false;
@@ -157,6 +160,16 @@ class RISCVMachineFunctionInfo : public MachineFunctionInfo {
SDValue getIncomingByValArgs(unsigned Idx) { return IncomingByValArgs[Idx]; }
unsigned getIncomingByValArgsSize() const { return IncomingByValArgs.size(); }
+ void addIncomingIndirectArg(SDValue Val) {
+ IncomingIndirectArgs.push_back(Val);
+ }
+ SDValue getIncomingIndirectArg(unsigned Idx) {
+ return IncomingIndirectArgs[Idx];
+ }
+ unsigned getIncomingIndirectArgsSize() const {
+ return IncomingIndirectArgs.size();
+ }
+
enum class PushPopKind { None = 0, StdExtZcmp, VendorXqccmp };
PushPopKind getPushPopKind(const MachineFunction &MF) const;
diff --git a/llvm/test/CodeGen/RISCV/musttail-indirect-args.ll b/llvm/test/CodeGen/RISCV/musttail-indirect-args.ll
new file mode 100644
index 0000000000000..447cacf13e2b8
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/musttail-indirect-args.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 %s -o - | FileCheck %s --check-prefix=RV32
+; RUN: llc -mtriple=riscv64 %s -o - | FileCheck %s --check-prefix=RV64
+
+; Test that musttail with indirect args (fp128 on RV32) forwards the incoming
+; pointer instead of creating a new stack temporary. Without this fix, the
+; pointer would dangle after the tail call deallocates the caller's frame.
+
+declare i32 @callee_musttail_indirect(fp128 %a)
+
+; fp128 is indirect on RV32 (too large for registers), direct on RV64.
+; On RV32, musttail must forward the incoming indirect pointer (a0) directly.
+define i32 @caller_musttail_indirect(fp128 %a) nounwind {
+; RV32-LABEL: caller_musttail_indirect:
+; RV32: # %bb.0:
+; RV32-NEXT: tail callee_musttail_indirect
+;
+; RV64-LABEL: caller_musttail_indirect:
+; RV64: # %bb.0:
+; RV64-NEXT: tail callee_musttail_indirect
+ %call = musttail call i32 @callee_musttail_indirect(fp128 %a)
+ ret i32 %call
+}
+
+; Verify that non-musttail tail call with indirect args does NOT tail call
+; (this is the PR #184972 fix - indirect args are unsafe for regular tail calls).
+define void @caller_no_musttail_indirect() nounwind {
+; RV32-LABEL: caller_no_musttail_indirect:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: lui a1, 262128
+; RV32-NEXT: mv a0, sp
+; RV32-NEXT: sw zero, 0(sp)
+; RV32-NEXT: sw zero, 4(sp)
+; RV32-NEXT: sw zero, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: call callee_musttail_indirect
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-LABEL: caller_no_musttail_indirect:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, 16383
+; RV64-NEXT: slli a1, a1, 36
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: tail callee_musttail_indirect
+ %call = tail call i32 @callee_musttail_indirect(fp128 0xL00000000000000003FFF000000000000)
+ ret void
+}
+
+; Test musttail with i128 on RV32 (indirect, split into 4 x i32 parts).
+declare i64 @callee_musttail_i128(i128 %a)
+
+define i64 @caller_musttail_i128(i128 %a) nounwind {
+; RV32-LABEL: caller_musttail_i128:
+; RV32: # %bb.0:
+; RV32-NEXT: tail callee_musttail_i128
+;
+; RV64-LABEL: caller_musttail_i128:
+; RV64: # %bb.0:
+; RV64-NEXT: tail callee_musttail_i128
+ %call = musttail call i64 @callee_musttail_i128(i128 %a)
+ ret i64 %call
+}
diff --git a/llvm/test/CodeGen/RISCV/tail-calls.ll b/llvm/test/CodeGen/RISCV/tail-calls.ll
index 33feba3c6fba1..79855aa03adcf 100644
--- a/llvm/test/CodeGen/RISCV/tail-calls.ll
+++ b/llvm/test/CodeGen/RISCV/tail-calls.ll
@@ -247,20 +247,24 @@ declare i32 @callee_indirect_args(fp128 %a)
define void @caller_indirect_args() nounwind {
; CHECK-LABEL: caller_indirect_args:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; CHECK-NEXT: lui a1, 262128
; CHECK-NEXT: mv a0, sp
; CHECK-NEXT: sw zero, 0(sp)
; CHECK-NEXT: sw zero, 4(sp)
; CHECK-NEXT: sw zero, 8(sp)
; CHECK-NEXT: sw a1, 12(sp)
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: tail callee_indirect_args
+; CHECK-NEXT: call callee_indirect_args
+; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: ret
;
; CHECK-LARGE-ZICFILP-LABEL: caller_indirect_args:
; CHECK-LARGE-ZICFILP: # %bb.0: # %entry
; CHECK-LARGE-ZICFILP-NEXT: lpad 0
-; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -16
+; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -32
+; CHECK-LARGE-ZICFILP-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; CHECK-LARGE-ZICFILP-NEXT: lui a1, 262128
; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi9:
; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI7_0)
@@ -270,8 +274,10 @@ define void @caller_indirect_args() nounwind {
; CHECK-LARGE-ZICFILP-NEXT: sw zero, 4(sp)
; CHECK-LARGE-ZICFILP-NEXT: sw zero, 8(sp)
; CHECK-LARGE-ZICFILP-NEXT: sw a1, 12(sp)
-; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 16
-; CHECK-LARGE-ZICFILP-NEXT: jr t2
+; CHECK-LARGE-ZICFILP-NEXT: jalr t2
+; CHECK-LARGE-ZICFILP-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 32
+; CHECK-LARGE-ZICFILP-NEXT: ret
entry:
%call = tail call i32 @callee_indirect_args(fp128 0xL00000000000000003FFF000000000000)
ret void
>From d94c5a02c11d56c4f2469bd05ae44083d98f743d Mon Sep 17 00:00:00 2001
From: Xavier Roche <xavier.roche at algolia.com>
Date: Sat, 7 Mar 2026 08:29:22 +0100
Subject: [PATCH 2/3] [RISCV] Address review: use DenseMap for indirect args,
add tests
- Use DenseMap<unsigned, SDValue> keyed by OrigArgIndex instead of
SmallVector for incoming indirect arg pointers. This avoids a fragile
O(n) counting loop in LowerCall and directly maps argument indices.
- Add test cases for: two indirect args (DenseMap multi-key), mixed
direct+indirect, and i128 split+trailing direct arg.
- Add test for non-musttail forwarding of indirect arg (shows it
correctly falls back to call).
Assisted-by: Claude (Anthropic)
Co-Authored-By: Claude Opus 4.6 <noreply at anthropic.com>
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 13 +---
.../Target/RISCV/RISCVMachineFunctionInfo.h | 19 ++---
.../CodeGen/RISCV/musttail-indirect-args.ll | 77 +++++++++++++++++++
3 files changed, 91 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 68cff3332b873..e49628550bc5f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -24454,8 +24454,6 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL, *this);
if (VA.getLocInfo() == CCValAssign::Indirect) {
- // Save the incoming indirect pointer for musttail forwarding.
- RVFI->addIncomingIndirectArg(ArgValue);
// If the original argument was split and passed by reference (e.g. i128
// on RV32), we need to load all parts of it here (using the same
// address). Vectors may be partly split to registers and partly to the
@@ -24464,6 +24462,8 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
MachinePointerInfo()));
unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
+ // Save the incoming indirect pointer for musttail forwarding.
+ RVFI->setIncomingIndirectArg(ArgIndex, ArgValue);
unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
assert(VA.getValVT().isVector() || ArgPartOffset == 0);
while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
@@ -24784,15 +24784,10 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
// of creating a new stack temporary. The incoming pointer points to
// the caller's caller's frame, which remains valid after a tail call.
if (IsTailCall && CLI.CB && CLI.CB->isMustTailCall()) {
- unsigned IndirectIdx = 0;
- for (unsigned k = 0; k < OutIdx; ++k) {
- if (ArgLocs[k].getLocInfo() == CCValAssign::Indirect)
- ++IndirectIdx;
- }
- ArgValue = RVFI->getIncomingIndirectArg(IndirectIdx);
+ unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
+ ArgValue = RVFI->getIncomingIndirectArg(ArgIndex);
// Skip any split parts of this argument (they are covered by the
// forwarded pointer).
- unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
++i;
++OutIdx;
diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
index 65b7226025da5..9db41bc04094e 100644
--- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
@@ -14,6 +14,7 @@
#define LLVM_LIB_TARGET_RISCV_RISCVMACHINEFUNCTIONINFO_H
#include "RISCVSubtarget.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -73,8 +74,9 @@ class RISCVMachineFunctionInfo : public MachineFunctionInfo {
/// Incoming ByVal arguments
SmallVector<SDValue, 8> IncomingByValArgs;
- /// Incoming indirect argument pointers (for musttail forwarding)
- SmallVector<SDValue, 4> IncomingIndirectArgs;
+ /// Incoming indirect argument pointers, keyed by OrigArgIndex.
+ /// Used for musttail forwarding of indirect args.
+ DenseMap<unsigned, SDValue> IncomingIndirectArgs;
/// Is there any vector argument or return?
bool IsVectorCall = false;
@@ -160,14 +162,13 @@ class RISCVMachineFunctionInfo : public MachineFunctionInfo {
SDValue getIncomingByValArgs(unsigned Idx) { return IncomingByValArgs[Idx]; }
unsigned getIncomingByValArgsSize() const { return IncomingByValArgs.size(); }
- void addIncomingIndirectArg(SDValue Val) {
- IncomingIndirectArgs.push_back(Val);
+ void setIncomingIndirectArg(unsigned ArgIndex, SDValue Val) {
+ IncomingIndirectArgs[ArgIndex] = Val;
}
- SDValue getIncomingIndirectArg(unsigned Idx) {
- return IncomingIndirectArgs[Idx];
- }
- unsigned getIncomingIndirectArgsSize() const {
- return IncomingIndirectArgs.size();
+ SDValue getIncomingIndirectArg(unsigned ArgIndex) const {
+ auto It = IncomingIndirectArgs.find(ArgIndex);
+ assert(It != IncomingIndirectArgs.end() && "No incoming indirect arg");
+ return It->second;
}
enum class PushPopKind { None = 0, StdExtZcmp, VendorXqccmp };
diff --git a/llvm/test/CodeGen/RISCV/musttail-indirect-args.ll b/llvm/test/CodeGen/RISCV/musttail-indirect-args.ll
index 447cacf13e2b8..97816c4c41e9d 100644
--- a/llvm/test/CodeGen/RISCV/musttail-indirect-args.ll
+++ b/llvm/test/CodeGen/RISCV/musttail-indirect-args.ll
@@ -50,6 +50,67 @@ define void @caller_no_musttail_indirect() nounwind {
ret void
}
+; Verify that non-musttail tail call forwarding an indirect arg from the
+; caller's own parameters also does NOT tail call (the arg lives on the
+; caller's frame, which would be deallocated).
+define i32 @caller_no_musttail_forward_indirect(fp128 %a) nounwind {
+; RV32-LABEL: caller_no_musttail_forward_indirect:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: lw a1, 0(a0)
+; RV32-NEXT: lw a2, 4(a0)
+; RV32-NEXT: lw a3, 8(a0)
+; RV32-NEXT: lw a4, 12(a0)
+; RV32-NEXT: mv a0, sp
+; RV32-NEXT: sw a1, 0(sp)
+; RV32-NEXT: sw a2, 4(sp)
+; RV32-NEXT: sw a3, 8(sp)
+; RV32-NEXT: sw a4, 12(sp)
+; RV32-NEXT: call callee_musttail_indirect
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-LABEL: caller_no_musttail_forward_indirect:
+; RV64: # %bb.0:
+; RV64-NEXT: tail callee_musttail_indirect
+ %call = tail call i32 @callee_musttail_indirect(fp128 %a)
+ ret i32 %call
+}
+
+; Test musttail with two indirect fp128 args on RV32. Both pointers must be
+; forwarded. Exercises the DenseMap with two distinct OrigArgIndex values.
+declare i32 @callee_musttail_two_indirect(fp128 %a, fp128 %b)
+
+define i32 @caller_musttail_two_indirect(fp128 %a, fp128 %b) nounwind {
+; RV32-LABEL: caller_musttail_two_indirect:
+; RV32: # %bb.0:
+; RV32-NEXT: tail callee_musttail_two_indirect
+;
+; RV64-LABEL: caller_musttail_two_indirect:
+; RV64: # %bb.0:
+; RV64-NEXT: tail callee_musttail_two_indirect
+ %call = musttail call i32 @callee_musttail_two_indirect(fp128 %a, fp128 %b)
+ ret i32 %call
+}
+
+; Test musttail with mixed direct (i32 in register) + indirect (fp128) args.
+; Confirms OrigArgIndex lookup works when not all args are indirect.
+declare i32 @callee_musttail_mixed(i32 %x, fp128 %a)
+
+define i32 @caller_musttail_mixed(i32 %x, fp128 %a) nounwind {
+; RV32-LABEL: caller_musttail_mixed:
+; RV32: # %bb.0:
+; RV32-NEXT: tail callee_musttail_mixed
+;
+; RV64-LABEL: caller_musttail_mixed:
+; RV64: # %bb.0:
+; RV64-NEXT: tail callee_musttail_mixed
+ %call = musttail call i32 @callee_musttail_mixed(i32 %x, fp128 %a)
+ ret i32 %call
+}
+
; Test musttail with i128 on RV32 (indirect, split into 4 x i32 parts).
declare i64 @callee_musttail_i128(i128 %a)
@@ -64,3 +125,19 @@ define i64 @caller_musttail_i128(i128 %a) nounwind {
%call = musttail call i64 @callee_musttail_i128(i128 %a)
ret i64 %call
}
+
+; Test musttail with i128 (indirect+split on RV32) plus a trailing i32 direct arg.
+; Exercises the split-skip logic followed by a normal register arg.
+declare i64 @callee_musttail_i128_and_i32(i128 %a, i32 %x)
+
+define i64 @caller_musttail_i128_and_i32(i128 %a, i32 %x) nounwind {
+; RV32-LABEL: caller_musttail_i128_and_i32:
+; RV32: # %bb.0:
+; RV32-NEXT: tail callee_musttail_i128_and_i32
+;
+; RV64-LABEL: caller_musttail_i128_and_i32:
+; RV64: # %bb.0:
+; RV64-NEXT: tail callee_musttail_i128_and_i32
+ %call = musttail call i64 @callee_musttail_i128_and_i32(i128 %a, i32 %x)
+ ret i64 %call
+}
>From ecb7952f4ef1f2eb475cc6d4f67055b08bc2c3a0 Mon Sep 17 00:00:00 2001
From: Xavier Roche <xavier.roche at algolia.com>
Date: Sat, 7 Mar 2026 13:02:31 +0100
Subject: [PATCH 3/3] [RISCV] Fix musttail indirect arg forwarding when
arguments are reordered
Outs[].OrigArgIndex is the position in the call's argument list (callee
perspective), but the incoming indirect arg map is keyed by the caller's
formal parameter index. When musttail reorders arguments (e.g.,
`musttail call @f(fp128 %b, fp128 %a)`), these indices diverge, causing
the wrong pointers to be forwarded.
Fix by resolving the caller's formal parameter index via the IR: walk
CLI.CB->args() to find the Argument at the matching call position and
use its getArgNo() as the DenseMap lookup key.
Add comprehensive tests for swapped, rotated, duplicated, and
stack-spilled indirect args.
Assisted-by: Claude (Anthropic)
Co-Authored-By: Claude Opus 4.6 <noreply at anthropic.com>
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 23 +-
.../CodeGen/RISCV/musttail-indirect-args.ll | 209 ++++++++++++++++++
2 files changed, 229 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index e49628550bc5f..0dafa47f8cc00 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -24784,11 +24784,28 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
// of creating a new stack temporary. The incoming pointer points to
// the caller's caller's frame, which remains valid after a tail call.
if (IsTailCall && CLI.CB && CLI.CB->isMustTailCall()) {
- unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
- ArgValue = RVFI->getIncomingIndirectArg(ArgIndex);
+ // Outs[OutIdx].OrigArgIndex is the position in the call's argument
+ // list (callee perspective), but the incoming indirect arg map is
+ // keyed by the caller's formal parameter index. When musttail
+ // reorders arguments, these differ. Resolve via the IR: find which
+ // formal parameter is being passed at this call position.
+ unsigned CallArgIdx = Outs[OutIdx].OrigArgIndex;
+ unsigned FormalIdx = CallArgIdx; // default if lookup fails
+ unsigned Idx = 0;
+ for (const auto &CallArg : CLI.CB->args()) {
+ if (CallArg->getType()->isEmptyTy())
+ continue;
+ if (Idx == CallArgIdx) {
+ if (const auto *FormalArg = dyn_cast<Argument>(CallArg))
+ FormalIdx = FormalArg->getArgNo();
+ break;
+ }
+ ++Idx;
+ }
+ ArgValue = RVFI->getIncomingIndirectArg(FormalIdx);
// Skip any split parts of this argument (they are covered by the
// forwarded pointer).
- while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
+ while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == CallArgIdx) {
++i;
++OutIdx;
}
diff --git a/llvm/test/CodeGen/RISCV/musttail-indirect-args.ll b/llvm/test/CodeGen/RISCV/musttail-indirect-args.ll
index 97816c4c41e9d..c6d1743a27d26 100644
--- a/llvm/test/CodeGen/RISCV/musttail-indirect-args.ll
+++ b/llvm/test/CodeGen/RISCV/musttail-indirect-args.ll
@@ -141,3 +141,212 @@ define i64 @caller_musttail_i128_and_i32(i128 %a, i32 %x) nounwind {
%call = musttail call i64 @callee_musttail_i128_and_i32(i128 %a, i32 %x)
ret i64 %call
}
+
+; Test musttail with two indirect args SWAPPED. The pointers must be exchanged
+; before the tail call. This exercises the OrigArgIndex -> Argument::getArgNo()
+; resolution in LowerCall.
+define i32 @caller_musttail_two_indirect_swapped(fp128 %a, fp128 %b) nounwind {
+; RV32-LABEL: caller_musttail_two_indirect_swapped:
+; RV32: # %bb.0:
+; RV32-NEXT: mv a2, a0
+; RV32-NEXT: mv a0, a1
+; RV32-NEXT: mv a1, a2
+; RV32-NEXT: tail callee_musttail_two_indirect
+;
+; RV64-LABEL: caller_musttail_two_indirect_swapped:
+; RV64: # %bb.0:
+; RV64-NEXT: mv a4, a1
+; RV64-NEXT: mv a5, a0
+; RV64-NEXT: mv a0, a2
+; RV64-NEXT: mv a1, a3
+; RV64-NEXT: mv a2, a5
+; RV64-NEXT: mv a3, a4
+; RV64-NEXT: tail callee_musttail_two_indirect
+ %call = musttail call i32 @callee_musttail_two_indirect(fp128 %b, fp128 %a)
+ ret i32 %call
+}
+
+; Test musttail with three indirect args rotated: call @f(%c, %a, %b).
+; All three pointers need to be shuffled.
+declare i32 @callee_musttail_three_indirect(fp128 %a, fp128 %b, fp128 %c)
+
+define i32 @caller_musttail_three_indirect_rotated(fp128 %a, fp128 %b, fp128 %c) nounwind {
+; RV32-LABEL: caller_musttail_three_indirect_rotated:
+; RV32: # %bb.0:
+; RV32-NEXT: mv a3, a1
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: mv a0, a2
+; RV32-NEXT: mv a2, a3
+; RV32-NEXT: tail callee_musttail_three_indirect
+;
+; RV64-LABEL: caller_musttail_three_indirect_rotated:
+; RV64: # %bb.0:
+; RV64-NEXT: mv a6, a3
+; RV64-NEXT: mv a7, a2
+; RV64-NEXT: mv a3, a1
+; RV64-NEXT: mv a2, a0
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: mv a1, a5
+; RV64-NEXT: mv a4, a7
+; RV64-NEXT: mv a5, a6
+; RV64-NEXT: tail callee_musttail_three_indirect
+ %call = musttail call i32 @callee_musttail_three_indirect(fp128 %c, fp128 %a, fp128 %b)
+ ret i32 %call
+}
+
+; Test musttail with mixed direct + indirect args where the indirect args
+; are swapped but the direct arg stays in place.
+declare i32 @callee_musttail_mixed_two_indirect(i32 %x, fp128 %a, fp128 %b)
+
+define i32 @caller_musttail_mixed_swap_indirect(i32 %x, fp128 %a, fp128 %b) nounwind {
+; RV32-LABEL: caller_musttail_mixed_swap_indirect:
+; RV32: # %bb.0:
+; RV32-NEXT: mv a3, a1
+; RV32-NEXT: mv a1, a2
+; RV32-NEXT: mv a2, a3
+; RV32-NEXT: tail callee_musttail_mixed_two_indirect
+;
+; RV64-LABEL: caller_musttail_mixed_swap_indirect:
+; RV64: # %bb.0:
+; RV64-NEXT: mv a5, a2
+; RV64-NEXT: mv a6, a1
+; RV64-NEXT: mv a1, a3
+; RV64-NEXT: mv a2, a4
+; RV64-NEXT: mv a3, a6
+; RV64-NEXT: mv a4, a5
+; RV64-NEXT: tail callee_musttail_mixed_two_indirect
+ %call = musttail call i32 @callee_musttail_mixed_two_indirect(i32 %x, fp128 %b, fp128 %a)
+ ret i32 %call
+}
+
+; Test musttail with swapped i128 on RV32 (split indirect args).
+declare i64 @callee_musttail_two_i128(i128 %a, i128 %b)
+
+define i64 @caller_musttail_two_i128_swapped(i128 %a, i128 %b) nounwind {
+; RV32-LABEL: caller_musttail_two_i128_swapped:
+; RV32: # %bb.0:
+; RV32-NEXT: mv a2, a0
+; RV32-NEXT: mv a0, a1
+; RV32-NEXT: mv a1, a2
+; RV32-NEXT: tail callee_musttail_two_i128
+;
+; RV64-LABEL: caller_musttail_two_i128_swapped:
+; RV64: # %bb.0:
+; RV64-NEXT: mv a4, a1
+; RV64-NEXT: mv a5, a0
+; RV64-NEXT: mv a0, a2
+; RV64-NEXT: mv a1, a3
+; RV64-NEXT: mv a2, a5
+; RV64-NEXT: mv a3, a4
+; RV64-NEXT: tail callee_musttail_two_i128
+ %call = musttail call i64 @callee_musttail_two_i128(i128 %b, i128 %a)
+ ret i64 %call
+}
+
+; Test musttail passing the same indirect arg to both positions.
+define i32 @caller_musttail_two_indirect_dup(fp128 %a, fp128 %b) nounwind {
+; RV32-LABEL: caller_musttail_two_indirect_dup:
+; RV32: # %bb.0:
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: tail callee_musttail_two_indirect
+;
+; RV64-LABEL: caller_musttail_two_indirect_dup:
+; RV64: # %bb.0:
+; RV64-NEXT: mv a2, a0
+; RV64-NEXT: mv a3, a1
+; RV64-NEXT: tail callee_musttail_two_indirect
+ %call = musttail call i32 @callee_musttail_two_indirect(fp128 %a, fp128 %a)
+ ret i32 %call
+}
+
+; Test musttail with enough indirect args to spill to the stack (9 fp128 on
+; RV32 uses a0-a7 for the first 8 pointers, 9th goes on the stack).
+declare void @callee_musttail_nine_indirect(fp128, fp128, fp128, fp128, fp128, fp128, fp128, fp128, fp128)
+
+define void @caller_musttail_nine_indirect(fp128 %a, fp128 %b, fp128 %c, fp128 %d, fp128 %e, fp128 %f, fp128 %g, fp128 %h, fp128 %i) nounwind {
+; RV32-LABEL: caller_musttail_nine_indirect:
+; RV32: # %bb.0:
+; RV32-NEXT: lw t0, 0(sp)
+; RV32-NEXT: sw t0, 0(sp)
+; RV32-NEXT: tail callee_musttail_nine_indirect
+;
+; RV64-LABEL: caller_musttail_nine_indirect:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -32
+; RV64-NEXT: sd s0, 24(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s1, 16(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s2, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: ld t0, 104(sp)
+; RV64-NEXT: ld t1, 96(sp)
+; RV64-NEXT: ld t2, 88(sp)
+; RV64-NEXT: ld t3, 80(sp)
+; RV64-NEXT: ld t4, 72(sp)
+; RV64-NEXT: ld t5, 64(sp)
+; RV64-NEXT: ld t6, 32(sp)
+; RV64-NEXT: ld s0, 40(sp)
+; RV64-NEXT: ld s1, 48(sp)
+; RV64-NEXT: ld s2, 56(sp)
+; RV64-NEXT: sd t6, 32(sp)
+; RV64-NEXT: sd s0, 40(sp)
+; RV64-NEXT: sd s1, 48(sp)
+; RV64-NEXT: sd s2, 56(sp)
+; RV64-NEXT: sd t5, 64(sp)
+; RV64-NEXT: sd t4, 72(sp)
+; RV64-NEXT: sd t3, 80(sp)
+; RV64-NEXT: sd t2, 88(sp)
+; RV64-NEXT: sd t1, 96(sp)
+; RV64-NEXT: sd t0, 104(sp)
+; RV64-NEXT: ld s0, 24(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s1, 16(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s2, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 32
+; RV64-NEXT: tail callee_musttail_nine_indirect
+ musttail call void @callee_musttail_nine_indirect(fp128 %a, fp128 %b, fp128 %c, fp128 %d, fp128 %e, fp128 %f, fp128 %g, fp128 %h, fp128 %i)
+ ret void
+}
+
+; Test musttail swapping the first (register) and last (stack-spilled) args.
+define void @caller_musttail_nine_indirect_swap_first_last(fp128 %a, fp128 %b, fp128 %c, fp128 %d, fp128 %e, fp128 %f, fp128 %g, fp128 %h, fp128 %i) nounwind {
+; RV32-LABEL: caller_musttail_nine_indirect_swap_first_last:
+; RV32: # %bb.0:
+; RV32-NEXT: lw t0, 0(sp)
+; RV32-NEXT: sw a0, 0(sp)
+; RV32-NEXT: mv a0, t0
+; RV32-NEXT: tail callee_musttail_nine_indirect
+;
+; RV64-LABEL: caller_musttail_nine_indirect_swap_first_last:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -32
+; RV64-NEXT: sd s0, 24(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s1, 16(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s2, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: ld t0, 96(sp)
+; RV64-NEXT: ld t1, 104(sp)
+; RV64-NEXT: ld t2, 88(sp)
+; RV64-NEXT: ld t3, 80(sp)
+; RV64-NEXT: ld t4, 72(sp)
+; RV64-NEXT: ld t5, 64(sp)
+; RV64-NEXT: ld t6, 32(sp)
+; RV64-NEXT: ld s0, 40(sp)
+; RV64-NEXT: ld s1, 48(sp)
+; RV64-NEXT: ld s2, 56(sp)
+; RV64-NEXT: sd t6, 32(sp)
+; RV64-NEXT: sd s0, 40(sp)
+; RV64-NEXT: sd s1, 48(sp)
+; RV64-NEXT: sd s2, 56(sp)
+; RV64-NEXT: sd t5, 64(sp)
+; RV64-NEXT: sd t4, 72(sp)
+; RV64-NEXT: sd t3, 80(sp)
+; RV64-NEXT: sd t2, 88(sp)
+; RV64-NEXT: sd a0, 96(sp)
+; RV64-NEXT: sd a1, 104(sp)
+; RV64-NEXT: mv a0, t0
+; RV64-NEXT: mv a1, t1
+; RV64-NEXT: ld s0, 24(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s1, 16(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s2, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 32
+; RV64-NEXT: tail callee_musttail_nine_indirect
+ musttail call void @callee_musttail_nine_indirect(fp128 %i, fp128 %b, fp128 %c, fp128 %d, fp128 %e, fp128 %f, fp128 %g, fp128 %h, fp128 %a)
+ ret void
+}
More information about the llvm-commits
mailing list