[llvm] [RISCV] improve `musttail` support (PR #170547)
Folkert de Vries via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 3 13:33:00 PST 2025
https://github.com/folkertdev updated https://github.com/llvm/llvm-project/pull/170547
>From 2865ca1ce2102f80a4bcefc977e9ff6bb26f7977 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert at folkertdev.nl>
Date: Wed, 3 Dec 2025 22:24:20 +0100
Subject: [PATCH 1/5] deduplicate `addTokenForArgument`
---
llvm/include/llvm/CodeGen/TargetLowering.h | 6 ++++
.../CodeGen/SelectionDAG/TargetLowering.cpp | 30 ++++++++++++++++
.../Target/AArch64/AArch64ISelLowering.cpp | 31 -----------------
llvm/lib/Target/AArch64/AArch64ISelLowering.h | 6 ----
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 34 -------------------
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 5 ---
6 files changed, 36 insertions(+), 76 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index b2697c81fd825..9c1ed8365961f 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -4999,6 +4999,12 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
llvm_unreachable("Not Implemented");
}
+ /// Finds the incoming stack arguments which overlap the given fixed stack
+ /// object and incorporates their load into the current chain. This prevents
+ /// an upcoming store from clobbering the stack argument before it's used.
+ SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
+ MachineFrameInfo &MFI, int ClobberedFI) const;
+
/// Target-specific cleanup for formal ByVal parameters.
virtual void HandleByVal(CCState *, unsigned &, Align) const {}
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 521d8f07434e6..ec1ce8bfa7ef3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -115,6 +115,36 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
return true;
}
+SDValue TargetLowering::addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
+ MachineFrameInfo &MFI,
+ int ClobberedFI) const {
+ SmallVector<SDValue, 8> ArgChains;
+ int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
+ int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
+
+ // Include the original chain at the beginning of the list. When this is
+ // used by target LowerCall hooks, this helps legalize find the
+ // CALLSEQ_BEGIN node.
+ ArgChains.push_back(Chain);
+
+ // Add a chain value for each stack argument corresponding
+ for (SDNode *U : DAG.getEntryNode().getNode()->users())
+ if (LoadSDNode *L = dyn_cast<LoadSDNode>(U))
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
+ if (FI->getIndex() < 0) {
+ int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
+ int64_t InLastByte = InFirstByte;
+ InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
+
+ if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
+ (FirstByte <= InFirstByte && InFirstByte <= LastByte))
+ ArgChains.push_back(SDValue(L, 1));
+ }
+
+ // Build a tokenfactor for all the chains.
+ return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
+}
+
/// Set CallLoweringInfo attribute flags based on a call instruction
/// and called function attributes.
void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 6072fd9d8f242..dbc534b269393 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -9353,37 +9353,6 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
return true;
}
-SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
- SelectionDAG &DAG,
- MachineFrameInfo &MFI,
- int ClobberedFI) const {
- SmallVector<SDValue, 8> ArgChains;
- int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
- int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
-
- // Include the original chain at the beginning of the list. When this is
- // used by target LowerCall hooks, this helps legalize find the
- // CALLSEQ_BEGIN node.
- ArgChains.push_back(Chain);
-
- // Add a chain value for each stack argument corresponding
- for (SDNode *U : DAG.getEntryNode().getNode()->users())
- if (LoadSDNode *L = dyn_cast<LoadSDNode>(U))
- if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
- if (FI->getIndex() < 0) {
- int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
- int64_t InLastByte = InFirstByte;
- InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
-
- if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
- (FirstByte <= InFirstByte && InFirstByte <= LastByte))
- ArgChains.push_back(SDValue(L, 1));
- }
-
- // Build a tokenfactor for all the chains.
- return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
-}
-
bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
bool TailCallOpt) const {
return (CallCC == CallingConv::Fast && TailCallOpt) ||
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 32aa913181a21..8f62af30d8c63 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -630,12 +630,6 @@ class AArch64TargetLowering : public TargetLowering {
bool
isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const;
- /// Finds the incoming stack arguments which overlap the given fixed stack
- /// object and incorporates their load into the current chain. This prevents
- /// an upcoming store from clobbering the stack argument before it's used.
- SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
- MachineFrameInfo &MFI, int ClobberedFI) const;
-
bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 971dfdbe3e70a..963e32f7557a6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -1355,40 +1355,6 @@ CCAssignFn *AMDGPUTargetLowering::CCAssignFnForReturn(CallingConv::ID CC,
return AMDGPUCallLowering::CCAssignFnForReturn(CC, IsVarArg);
}
-SDValue AMDGPUTargetLowering::addTokenForArgument(SDValue Chain,
- SelectionDAG &DAG,
- MachineFrameInfo &MFI,
- int ClobberedFI) const {
- SmallVector<SDValue, 8> ArgChains;
- int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
- int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
-
- // Include the original chain at the beginning of the list. When this is
- // used by target LowerCall hooks, this helps legalize find the
- // CALLSEQ_BEGIN node.
- ArgChains.push_back(Chain);
-
- // Add a chain value for each stack argument corresponding
- for (SDNode *U : DAG.getEntryNode().getNode()->users()) {
- if (LoadSDNode *L = dyn_cast<LoadSDNode>(U)) {
- if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr())) {
- if (FI->getIndex() < 0) {
- int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
- int64_t InLastByte = InFirstByte;
- InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
-
- if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
- (FirstByte <= InFirstByte && InFirstByte <= LastByte))
- ArgChains.push_back(SDValue(L, 1));
- }
- }
- }
- }
-
- // Build a tokenfactor for all the chains.
- return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
-}
-
SDValue AMDGPUTargetLowering::lowerUnhandledCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals,
StringRef Reason) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 9c0eff99981cd..435c917a29456 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -255,11 +255,6 @@ class AMDGPUTargetLowering : public TargetLowering {
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
SelectionDAG &DAG) const override;
- SDValue addTokenForArgument(SDValue Chain,
- SelectionDAG &DAG,
- MachineFrameInfo &MFI,
- int ClobberedFI) const;
-
SDValue lowerUnhandledCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals,
StringRef Reason) const;
>From 2945d996c17b4c8827febed9ff73d3162e269005 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert at folkertdev.nl>
Date: Wed, 19 Nov 2025 23:39:24 +0100
Subject: [PATCH 2/5] riscv: improve musttail based on loongarch code
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 49 ++++++++----
.../Target/RISCV/RISCVMachineFunctionInfo.h | 8 ++
llvm/test/CodeGen/RISCV/tail-calls.ll | 80 ++++++++-----------
3 files changed, 73 insertions(+), 64 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index be53f51afe79f..77809c3c562a4 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -23420,6 +23420,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
+ RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
switch (CallConv) {
default:
@@ -23608,6 +23609,8 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
RVFI->setVarArgsSaveSize(VarArgsSaveSize);
}
+ RVFI->setArgumentStackSize(CCInfo.getStackSize());
+
// All stores are grouped in one node to allow the matching between
// the size of Ins and InVals. This only happens for vararg functions.
if (!OutChains.empty()) {
@@ -23629,6 +23632,7 @@ bool RISCVTargetLowering::isEligibleForTailCallOptimization(
auto &Outs = CLI.Outs;
auto &Caller = MF.getFunction();
auto CallerCC = Caller.getCallingConv();
+ auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
// Exception-handling functions need a special set of instructions to
// indicate a return to the hardware. Tail-calling another function would
@@ -23638,8 +23642,9 @@ bool RISCVTargetLowering::isEligibleForTailCallOptimization(
if (Caller.hasFnAttribute("interrupt"))
return false;
- // Do not tail call opt if the stack is used to pass parameters.
- if (CCInfo.getStackSize() != 0)
+ // If the stack arguments for this call do not fit into our own save area then
+ // the call cannot be made tail.
+ if (CCInfo.getStackSize() > RVFI->getArgumentStackSize())
return false;
// Do not tail call opt if any parameters need to be passed indirectly.
@@ -23658,7 +23663,7 @@ bool RISCVTargetLowering::isEligibleForTailCallOptimization(
// semantics.
auto IsCallerStructRet = Caller.hasStructRetAttr();
auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
- if (IsCallerStructRet || IsCalleeStructRet)
+ if (IsCallerStructRet != IsCalleeStructRet)
return false;
// The callee has to preserve all registers the caller needs to preserve.
@@ -23670,12 +23675,12 @@ bool RISCVTargetLowering::isEligibleForTailCallOptimization(
return false;
}
- // Byval parameters hand the function a pointer directly into the stack area
- // we want to reuse during a tail call. Working around this *is* possible
- // but less efficient and uglier in LowerCall.
- for (auto &Arg : Outs)
- if (Arg.Flags.isByVal())
- return false;
+ // If the callee takes no arguments then go on to check the results of the
+ // call.
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
+ if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
+ return false;
return true;
}
@@ -23871,20 +23876,32 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), i);
} else {
assert(VA.isMemLoc() && "Argument not register or memory");
- assert(!IsTailCall && "Tail call not allowed if stack is used "
- "for passing parameters");
+ SDValue DstAddr;
+ MachinePointerInfo DstInfo;
+ int32_t Offset = VA.getLocMemOffset();
// Work out the address of the stack slot.
if (!StackPtr.getNode())
StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
- SDValue Address =
- DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
- DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
+
+ if (IsTailCall) {
+ unsigned OpSize = (VA.getValVT().getSizeInBits() + 7) / 8;
+ int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
+ DstAddr = DAG.getFrameIndex(FI, PtrVT);
+ DstInfo = MachinePointerInfo::getFixedStack(MF, FI);
+ // Make sure any stack arguments overlapping with where we're storing
+ // are loaded before this eventual operation. Otherwise they'll be
+ // clobbered.
+ Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
+ } else {
+ SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
+ DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
+ DstInfo = MachinePointerInfo::getStack(MF, Offset);
+ }
// Emit the store.
MemOpChains.push_back(
- DAG.getStore(Chain, DL, ArgValue, Address,
- MachinePointerInfo::getStack(MF, VA.getLocMemOffset())));
+ DAG.getStore(Chain, DL, ArgValue, DstAddr, DstInfo));
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
index f9be80feae211..b1df2a707081d 100644
--- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
@@ -65,6 +65,11 @@ class RISCVMachineFunctionInfo : public MachineFunctionInfo {
uint64_t RVVPadding = 0;
/// Size of stack frame to save callee saved registers
unsigned CalleeSavedStackSize = 0;
+
+ /// ArgumentStackSize - amount of bytes on stack consumed by the arguments
+ /// being passed on the stack
+ unsigned ArgumentStackSize = 0;
+
/// Is there any vector argument or return?
bool IsVectorCall = false;
@@ -142,6 +147,9 @@ class RISCVMachineFunctionInfo : public MachineFunctionInfo {
unsigned getCalleeSavedStackSize() const { return CalleeSavedStackSize; }
void setCalleeSavedStackSize(unsigned Size) { CalleeSavedStackSize = Size; }
+ unsigned getArgumentStackSize() const { return ArgumentStackSize; }
+ void setArgumentStackSize(unsigned size) { ArgumentStackSize = size; }
+
enum class PushPopKind { None = 0, StdExtZcmp, VendorXqccmp };
PushPopKind getPushPopKind(const MachineFunction &MF) const;
diff --git a/llvm/test/CodeGen/RISCV/tail-calls.ll b/llvm/test/CodeGen/RISCV/tail-calls.ll
index 6756fea8a1f85..8f63e320655c9 100644
--- a/llvm/test/CodeGen/RISCV/tail-calls.ll
+++ b/llvm/test/CodeGen/RISCV/tail-calls.ll
@@ -204,49 +204,39 @@ declare i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %
define i32 @caller_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) nounwind {
; CHECK-LABEL: caller_args:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi sp, sp, -32
-; CHECK-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; CHECK-NEXT: lw t0, 32(sp)
-; CHECK-NEXT: lw t1, 36(sp)
-; CHECK-NEXT: lw t2, 40(sp)
-; CHECK-NEXT: lw t3, 44(sp)
-; CHECK-NEXT: lw t4, 48(sp)
-; CHECK-NEXT: lw t5, 52(sp)
-; CHECK-NEXT: sw t4, 16(sp)
-; CHECK-NEXT: sw t5, 20(sp)
+; CHECK-NEXT: lw t0, 0(sp)
+; CHECK-NEXT: lw t1, 20(sp)
+; CHECK-NEXT: lw t2, 4(sp)
+; CHECK-NEXT: lw t3, 8(sp)
+; CHECK-NEXT: lw t4, 12(sp)
+; CHECK-NEXT: lw t5, 16(sp)
+; CHECK-NEXT: sw t2, 4(sp)
+; CHECK-NEXT: sw t3, 8(sp)
+; CHECK-NEXT: sw t4, 12(sp)
+; CHECK-NEXT: sw t5, 16(sp)
+; CHECK-NEXT: sw t1, 20(sp)
; CHECK-NEXT: sw t0, 0(sp)
-; CHECK-NEXT: sw t1, 4(sp)
-; CHECK-NEXT: sw t2, 8(sp)
-; CHECK-NEXT: sw t3, 12(sp)
-; CHECK-NEXT: call callee_args
-; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; CHECK-NEXT: addi sp, sp, 32
-; CHECK-NEXT: ret
+; CHECK-NEXT: tail callee_args
;
; CHECK-LARGE-ZICFILP-LABEL: caller_args:
; CHECK-LARGE-ZICFILP: # %bb.0: # %entry
; CHECK-LARGE-ZICFILP-NEXT: lpad 0
-; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -32
-; CHECK-LARGE-ZICFILP-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; CHECK-LARGE-ZICFILP-NEXT: lw t0, 32(sp)
-; CHECK-LARGE-ZICFILP-NEXT: lw t1, 36(sp)
-; CHECK-LARGE-ZICFILP-NEXT: lw t3, 40(sp)
-; CHECK-LARGE-ZICFILP-NEXT: lw t4, 44(sp)
-; CHECK-LARGE-ZICFILP-NEXT: lw t2, 48(sp)
-; CHECK-LARGE-ZICFILP-NEXT: lw t5, 52(sp)
-; CHECK-LARGE-ZICFILP-NEXT: sw t2, 16(sp)
-; CHECK-LARGE-ZICFILP-NEXT: sw t5, 20(sp)
+; CHECK-LARGE-ZICFILP-NEXT: lw t0, 0(sp)
+; CHECK-LARGE-ZICFILP-NEXT: lw t1, 20(sp)
+; CHECK-LARGE-ZICFILP-NEXT: lw t2, 4(sp)
+; CHECK-LARGE-ZICFILP-NEXT: lw t3, 16(sp)
+; CHECK-LARGE-ZICFILP-NEXT: lw t4, 12(sp)
+; CHECK-LARGE-ZICFILP-NEXT: lw t5, 8(sp)
+; CHECK-LARGE-ZICFILP-NEXT: sw t2, 4(sp)
; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi8:
; CHECK-LARGE-ZICFILP-NEXT: auipc t2, %pcrel_hi(.LCPI6_0)
; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi8)(t2)
-; CHECK-LARGE-ZICFILP-NEXT: sw t0, 0(sp)
-; CHECK-LARGE-ZICFILP-NEXT: sw t1, 4(sp)
-; CHECK-LARGE-ZICFILP-NEXT: sw t3, 8(sp)
+; CHECK-LARGE-ZICFILP-NEXT: sw t5, 8(sp)
; CHECK-LARGE-ZICFILP-NEXT: sw t4, 12(sp)
-; CHECK-LARGE-ZICFILP-NEXT: jalr t2
-; CHECK-LARGE-ZICFILP-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 32
-; CHECK-LARGE-ZICFILP-NEXT: ret
+; CHECK-LARGE-ZICFILP-NEXT: sw t3, 16(sp)
+; CHECK-LARGE-ZICFILP-NEXT: sw t1, 20(sp)
+; CHECK-LARGE-ZICFILP-NEXT: sw t0, 0(sp)
+; CHECK-LARGE-ZICFILP-NEXT: jr t2
entry:
%r = tail call i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n)
ret i32 %r
@@ -410,30 +400,24 @@ define i32 @caller_byval() nounwind {
; CHECK-LABEL: caller_byval:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK-NEXT: lw a0, 8(sp)
-; CHECK-NEXT: sw a0, 4(sp)
-; CHECK-NEXT: addi a0, sp, 4
-; CHECK-NEXT: call callee_byval
-; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-NEXT: lw a1, 12(sp)
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: sw a1, 8(sp)
; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: ret
+; CHECK-NEXT: tail callee_byval
;
; CHECK-LARGE-ZICFILP-LABEL: caller_byval:
; CHECK-LARGE-ZICFILP: # %bb.0: # %entry
; CHECK-LARGE-ZICFILP-NEXT: lpad 0
; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -16
-; CHECK-LARGE-ZICFILP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK-LARGE-ZICFILP-NEXT: lw a0, 8(sp)
-; CHECK-LARGE-ZICFILP-NEXT: sw a0, 4(sp)
+; CHECK-LARGE-ZICFILP-NEXT: lw a1, 12(sp)
; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi12:
; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI10_0)
; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi12)(a0)
-; CHECK-LARGE-ZICFILP-NEXT: addi a0, sp, 4
-; CHECK-LARGE-ZICFILP-NEXT: jalr t2
-; CHECK-LARGE-ZICFILP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-LARGE-ZICFILP-NEXT: addi a0, sp, 8
+; CHECK-LARGE-ZICFILP-NEXT: sw a1, 8(sp)
; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 16
-; CHECK-LARGE-ZICFILP-NEXT: ret
+; CHECK-LARGE-ZICFILP-NEXT: jr t2
entry:
%a = alloca ptr
%r = tail call i32 @callee_byval(ptr byval(ptr) %a)
>From 6c75b246ea9817c2ab32943d5811f02d9172b0c8 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert at folkertdev.nl>
Date: Wed, 19 Nov 2025 23:39:53 +0100
Subject: [PATCH 3/5] riscv: add musttail test
---
llvm/test/CodeGen/RISCV/musttail.ll | 395 ++++++++++++++++++++++++++++
1 file changed, 395 insertions(+)
create mode 100644 llvm/test/CodeGen/RISCV/musttail.ll
diff --git a/llvm/test/CodeGen/RISCV/musttail.ll b/llvm/test/CodeGen/RISCV/musttail.ll
new file mode 100644
index 0000000000000..32f9ac7fef470
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/musttail.ll
@@ -0,0 +1,395 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 %s -o - | FileCheck %s --check-prefix=RV32
+; RUN: llc -mtriple=riscv64 %s -o - | FileCheck %s --check-prefix=RV64
+
+declare i32 @many_args_callee(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9)
+
+define i32 @many_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9) {
+; RV32-LABEL: many_args_tail:
+; RV32: # %bb.0:
+; RV32-NEXT: li a0, 9
+; RV32-NEXT: li t0, 8
+; RV32-NEXT: li a1, 1
+; RV32-NEXT: li a2, 2
+; RV32-NEXT: li a3, 3
+; RV32-NEXT: li a4, 4
+; RV32-NEXT: li a5, 5
+; RV32-NEXT: li a6, 6
+; RV32-NEXT: sw a0, 4(sp)
+; RV32-NEXT: li a7, 7
+; RV32-NEXT: sw t0, 0(sp)
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: tail many_args_callee
+;
+; RV64-LABEL: many_args_tail:
+; RV64: # %bb.0:
+; RV64-NEXT: li a0, 9
+; RV64-NEXT: li t0, 8
+; RV64-NEXT: li a1, 1
+; RV64-NEXT: li a2, 2
+; RV64-NEXT: li a3, 3
+; RV64-NEXT: li a4, 4
+; RV64-NEXT: li a5, 5
+; RV64-NEXT: li a6, 6
+; RV64-NEXT: sd a0, 8(sp)
+; RV64-NEXT: li a7, 7
+; RV64-NEXT: sd t0, 0(sp)
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: tail many_args_callee
+ %ret = tail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9)
+ ret i32 %ret
+}
+
+define i32 @many_args_musttail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9) {
+; RV32-LABEL: many_args_musttail:
+; RV32: # %bb.0:
+; RV32-NEXT: li a0, 9
+; RV32-NEXT: li t0, 8
+; RV32-NEXT: li a1, 1
+; RV32-NEXT: li a2, 2
+; RV32-NEXT: li a3, 3
+; RV32-NEXT: li a4, 4
+; RV32-NEXT: li a5, 5
+; RV32-NEXT: li a6, 6
+; RV32-NEXT: sw a0, 4(sp)
+; RV32-NEXT: li a7, 7
+; RV32-NEXT: sw t0, 0(sp)
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: tail many_args_callee
+;
+; RV64-LABEL: many_args_musttail:
+; RV64: # %bb.0:
+; RV64-NEXT: li a0, 9
+; RV64-NEXT: li t0, 8
+; RV64-NEXT: li a1, 1
+; RV64-NEXT: li a2, 2
+; RV64-NEXT: li a3, 3
+; RV64-NEXT: li a4, 4
+; RV64-NEXT: li a5, 5
+; RV64-NEXT: li a6, 6
+; RV64-NEXT: sd a0, 8(sp)
+; RV64-NEXT: li a7, 7
+; RV64-NEXT: sd t0, 0(sp)
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: tail many_args_callee
+ %ret = musttail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9)
+ ret i32 %ret
+}
+
+; This function has more arguments than it's tail-callee. This isn't valid for
+; the musttail attribute, but can still be tail-called as a non-guaranteed
+; optimisation, because the outgoing arguments to @many_args_callee fit in the
+; stack space allocated by the caller of @more_args_tail.
+define i32 @more_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9) {
+; RV32-LABEL: more_args_tail:
+; RV32: # %bb.0:
+; RV32-NEXT: li a0, 9
+; RV32-NEXT: li t0, 8
+; RV32-NEXT: li a1, 1
+; RV32-NEXT: li a2, 2
+; RV32-NEXT: li a3, 3
+; RV32-NEXT: li a4, 4
+; RV32-NEXT: li a5, 5
+; RV32-NEXT: li a6, 6
+; RV32-NEXT: sw a0, 4(sp)
+; RV32-NEXT: li a7, 7
+; RV32-NEXT: sw t0, 0(sp)
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: tail many_args_callee
+;
+; RV64-LABEL: more_args_tail:
+; RV64: # %bb.0:
+; RV64-NEXT: li a0, 9
+; RV64-NEXT: li t0, 8
+; RV64-NEXT: li a1, 1
+; RV64-NEXT: li a2, 2
+; RV64-NEXT: li a3, 3
+; RV64-NEXT: li a4, 4
+; RV64-NEXT: li a5, 5
+; RV64-NEXT: li a6, 6
+; RV64-NEXT: sd a0, 8(sp)
+; RV64-NEXT: li a7, 7
+; RV64-NEXT: sd t0, 0(sp)
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: tail many_args_callee
+ %ret = tail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9)
+ ret i32 %ret
+}
+
+; Again, this isn't valid for musttail, but can be tail-called in practice
+; because the stack size is the same.
+define i32 @different_args_tail_32bit(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4) {
+; RV32-LABEL: different_args_tail_32bit:
+; RV32: # %bb.0:
+; RV32-NEXT: li a0, 9
+; RV32-NEXT: li t0, 8
+; RV32-NEXT: li a1, 1
+; RV32-NEXT: li a2, 2
+; RV32-NEXT: li a3, 3
+; RV32-NEXT: li a4, 4
+; RV32-NEXT: li a5, 5
+; RV32-NEXT: li a6, 6
+; RV32-NEXT: sw a0, 4(sp)
+; RV32-NEXT: li a7, 7
+; RV32-NEXT: sw t0, 0(sp)
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: tail many_args_callee
+;
+; RV64-LABEL: different_args_tail_32bit:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -32
+; RV64-NEXT: .cfi_def_cfa_offset 32
+; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: li a0, 9
+; RV64-NEXT: li t0, 8
+; RV64-NEXT: li a1, 1
+; RV64-NEXT: li a2, 2
+; RV64-NEXT: li a3, 3
+; RV64-NEXT: li a4, 4
+; RV64-NEXT: li a5, 5
+; RV64-NEXT: li a6, 6
+; RV64-NEXT: li a7, 7
+; RV64-NEXT: sd t0, 0(sp)
+; RV64-NEXT: sd a0, 8(sp)
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: call many_args_callee
+; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-NEXT: .cfi_restore ra
+; RV64-NEXT: addi sp, sp, 32
+; RV64-NEXT: .cfi_def_cfa_offset 0
+; RV64-NEXT: ret
+ %ret = tail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9)
+ ret i32 %ret
+}
+
+define i32 @different_args_tail_64bit(i128 %0, i128 %1, i128 %2, i128 %3, i128 %4) {
+; RV32-LABEL: different_args_tail_64bit:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: li a0, 9
+; RV32-NEXT: li t0, 8
+; RV32-NEXT: li a1, 1
+; RV32-NEXT: li a2, 2
+; RV32-NEXT: li a3, 3
+; RV32-NEXT: li a4, 4
+; RV32-NEXT: li a5, 5
+; RV32-NEXT: li a6, 6
+; RV32-NEXT: li a7, 7
+; RV32-NEXT: sw t0, 0(sp)
+; RV32-NEXT: sw a0, 4(sp)
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: call many_args_callee
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: .cfi_restore ra
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: .cfi_def_cfa_offset 0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: different_args_tail_64bit:
+; RV64: # %bb.0:
+; RV64-NEXT: li a0, 9
+; RV64-NEXT: li t0, 8
+; RV64-NEXT: li a1, 1
+; RV64-NEXT: li a2, 2
+; RV64-NEXT: li a3, 3
+; RV64-NEXT: li a4, 4
+; RV64-NEXT: li a5, 5
+; RV64-NEXT: li a6, 6
+; RV64-NEXT: sd a0, 8(sp)
+; RV64-NEXT: li a7, 7
+; RV64-NEXT: sd t0, 0(sp)
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: tail many_args_callee
+ %ret = tail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9)
+ ret i32 %ret
+}
+
+; Here, the caller requires less stack space for it's arguments than the
+; callee, so it would not ba valid to do a tail-call.
+define i32 @fewer_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) {
+; RV32-LABEL: fewer_args_tail:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: li a0, 9
+; RV32-NEXT: li t0, 8
+; RV32-NEXT: li a1, 1
+; RV32-NEXT: li a2, 2
+; RV32-NEXT: li a3, 3
+; RV32-NEXT: li a4, 4
+; RV32-NEXT: li a5, 5
+; RV32-NEXT: li a6, 6
+; RV32-NEXT: li a7, 7
+; RV32-NEXT: sw t0, 0(sp)
+; RV32-NEXT: sw a0, 4(sp)
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: call many_args_callee
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: .cfi_restore ra
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: .cfi_def_cfa_offset 0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: fewer_args_tail:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -32
+; RV64-NEXT: .cfi_def_cfa_offset 32
+; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: li a0, 9
+; RV64-NEXT: li t0, 8
+; RV64-NEXT: li a1, 1
+; RV64-NEXT: li a2, 2
+; RV64-NEXT: li a3, 3
+; RV64-NEXT: li a4, 4
+; RV64-NEXT: li a5, 5
+; RV64-NEXT: li a6, 6
+; RV64-NEXT: li a7, 7
+; RV64-NEXT: sd t0, 0(sp)
+; RV64-NEXT: sd a0, 8(sp)
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: call many_args_callee
+; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-NEXT: .cfi_restore ra
+; RV64-NEXT: addi sp, sp, 32
+; RV64-NEXT: .cfi_def_cfa_offset 0
+; RV64-NEXT: ret
+ %ret = tail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9)
+ ret i32 %ret
+}
+
+declare void @foo(i32, i32, i32, i32, i32, i32, i32, i32, i32)
+
+define void @bar(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8) nounwind {
+; RV32-LABEL: bar:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -48
+; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: mv s0, a7
+; RV32-NEXT: mv s1, a6
+; RV32-NEXT: mv s2, a5
+; RV32-NEXT: mv s3, a4
+; RV32-NEXT: mv s4, a3
+; RV32-NEXT: mv s5, a2
+; RV32-NEXT: mv s6, a1
+; RV32-NEXT: mv s7, a0
+; RV32-NEXT: li a0, 1
+; RV32-NEXT: sw a0, 0(sp)
+; RV32-NEXT: mv a0, s7
+; RV32-NEXT: call foo
+; RV32-NEXT: li a0, 2
+; RV32-NEXT: sw a0, 48(sp)
+; RV32-NEXT: mv a0, s7
+; RV32-NEXT: mv a1, s6
+; RV32-NEXT: mv a2, s5
+; RV32-NEXT: mv a3, s4
+; RV32-NEXT: mv a4, s3
+; RV32-NEXT: mv a5, s2
+; RV32-NEXT: mv a6, s1
+; RV32-NEXT: mv a7, s0
+; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 48
+; RV32-NEXT: tail foo
+;
+; RV64-LABEL: bar:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -80
+; RV64-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s1, 56(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s2, 48(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s3, 40(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s4, 32(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s5, 24(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s6, 16(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s7, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: mv s0, a7
+; RV64-NEXT: mv s1, a6
+; RV64-NEXT: mv s2, a5
+; RV64-NEXT: mv s3, a4
+; RV64-NEXT: mv s4, a3
+; RV64-NEXT: mv s5, a2
+; RV64-NEXT: mv s6, a1
+; RV64-NEXT: mv s7, a0
+; RV64-NEXT: li a0, 1
+; RV64-NEXT: sd a0, 0(sp)
+; RV64-NEXT: mv a0, s7
+; RV64-NEXT: call foo
+; RV64-NEXT: li a0, 2
+; RV64-NEXT: sd a0, 80(sp)
+; RV64-NEXT: mv a0, s7
+; RV64-NEXT: mv a1, s6
+; RV64-NEXT: mv a2, s5
+; RV64-NEXT: mv a3, s4
+; RV64-NEXT: mv a4, s3
+; RV64-NEXT: mv a5, s2
+; RV64-NEXT: mv a6, s1
+; RV64-NEXT: mv a7, s0
+; RV64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s2, 48(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s3, 40(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s4, 32(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s5, 24(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s6, 16(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s7, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 80
+; RV64-NEXT: tail foo
+entry:
+ call void @foo(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 1)
+ musttail call void @foo(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 2)
+ ret void
+}
+
+declare void @sret_callee(ptr sret({ double, double }) align 8)
+
+; Functions which return by sret can be tail-called because the incoming sret
+; pointer gets passed through to the callee.
+define void @sret_caller_tail(ptr sret({ double, double }) align 8 %result) {
+; RV32-LABEL: sret_caller_tail:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: tail sret_callee
+;
+; RV64-LABEL: sret_caller_tail:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: tail sret_callee
+entry:
+ tail call void @sret_callee(ptr sret({ double, double }) align 8 %result)
+ ret void
+}
+
+define void @sret_caller_musttail(ptr sret({ double, double }) align 8 %result) {
+; RV32-LABEL: sret_caller_musttail:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: tail sret_callee
+;
+; RV64-LABEL: sret_caller_musttail:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: tail sret_callee
+entry:
+ musttail call void @sret_callee(ptr sret({ double, double }) align 8 %result)
+ ret void
+}
>From cab416d7bdff467ba2a6e8c4e19042877e0bf5f1 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert at folkertdev.nl>
Date: Wed, 3 Dec 2025 19:40:53 +0100
Subject: [PATCH 4/5] riscv: support byval tail call arguments
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 63 ++++---
.../Target/RISCV/RISCVMachineFunctionInfo.h | 7 +
llvm/test/CodeGen/RISCV/musttail.ll | 176 ++++++++++++++++++
llvm/test/CodeGen/RISCV/tail-calls.ll | 40 ++--
4 files changed, 242 insertions(+), 44 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 77809c3c562a4..52cc3f613454a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -23549,6 +23549,8 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
continue;
}
InVals.push_back(ArgValue);
+ if (Ins[InsIdx].Flags.isByVal())
+ RVFI->addIncomingByValArgs(ArgValue);
}
if (any_of(ArgLocs,
@@ -23561,7 +23563,6 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
const TargetRegisterClass *RC = &RISCV::GPRRegClass;
MachineFrameInfo &MFI = MF.getFrameInfo();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
- RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
// Size of the vararg save area. For now, the varargs save area is either
// zero or large enough to hold a0-a7.
@@ -23647,18 +23648,6 @@ bool RISCVTargetLowering::isEligibleForTailCallOptimization(
if (CCInfo.getStackSize() > RVFI->getArgumentStackSize())
return false;
- // Do not tail call opt if any parameters need to be passed indirectly.
- // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
- // passed indirectly. So the address of the value will be passed in a
- // register, or if not available, then the address is put on the stack. In
- // order to pass indirectly, space on the stack often needs to be allocated
- // in order to store the value. In this case the CCInfo.getNextStackOffset()
- // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
- // are passed CCValAssign::Indirect.
- for (auto &VA : ArgLocs)
- if (VA.getLocInfo() == CCValAssign::Indirect)
- return false;
-
// Do not tail call opt if either caller or callee uses struct return
// semantics.
auto IsCallerStructRet = Caller.hasStructRetAttr();
@@ -23666,6 +23655,16 @@ bool RISCVTargetLowering::isEligibleForTailCallOptimization(
if (IsCallerStructRet != IsCalleeStructRet)
return false;
+ // Do not tail call opt if caller's and callee's byval arguments do not match.
+ for (unsigned i = 0, j = 0; i < Outs.size(); i++) {
+ if (!Outs[i].Flags.isByVal())
+ continue;
+ if (j++ >= RVFI->getIncomingByValArgsSize())
+ return false;
+ if (RVFI->getIncomingByValArgs(i).getValueType() != Outs[i].ArgVT)
+ return false;
+ }
+
// The callee has to preserve all registers the caller needs to preserve.
const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
@@ -23709,6 +23708,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
const CallBase *CB = CLI.CB;
MachineFunction &MF = DAG.getMachineFunction();
+ RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
MachineFunction::CallSiteInfo CSInfo;
// Set type id for call site info.
@@ -23743,7 +23743,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Create local copies for byval args
SmallVector<SDValue, 8> ByValArgs;
- for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ for (unsigned i = 0, j = 0, e = Outs.size(); i != e; ++i) {
ISD::ArgFlagsTy Flags = Outs[i].Flags;
if (!Flags.isByVal())
continue;
@@ -23752,16 +23752,27 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
unsigned Size = Flags.getByValSize();
Align Alignment = Flags.getNonZeroByValAlign();
- int FI =
- MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
- SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
+ SDValue Dst;
- Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
- /*IsVolatile=*/false,
- /*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall,
- MachinePointerInfo(), MachinePointerInfo());
- ByValArgs.push_back(FIPtr);
+ if (IsTailCall) {
+ SDValue CallerArg = RVFI->getIncomingByValArgs(j++);
+ if (isa<GlobalAddressSDNode>(Arg) || isa<ExternalSymbolSDNode>(Arg) ||
+ isa<FrameIndexSDNode>(Arg))
+ Dst = CallerArg;
+ } else {
+ int FI =
+ MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
+ Dst = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
+ }
+ if (Dst) {
+ Chain =
+ DAG.getMemcpy(Chain, DL, Dst, Arg, SizeNode, Alignment,
+ /*IsVolatile=*/false,
+ /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
+ MachinePointerInfo(), MachinePointerInfo());
+ ByValArgs.push_back(Dst);
+ }
}
if (!IsTailCall)
@@ -23864,8 +23875,12 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
}
// Use local copy if it is a byval arg.
- if (Flags.isByVal())
- ArgValue = ByValArgs[j++];
+ if (Flags.isByVal()) {
+ if (!IsTailCall || (isa<GlobalAddressSDNode>(ArgValue) ||
+ isa<ExternalSymbolSDNode>(ArgValue) ||
+ isa<FrameIndexSDNode>(ArgValue)))
+ ArgValue = ByValArgs[j++];
+ }
if (VA.isRegLoc()) {
// Queue up the argument copies and emit them at the end.
diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
index b1df2a707081d..9c2cd708f2784 100644
--- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
@@ -70,6 +70,9 @@ class RISCVMachineFunctionInfo : public MachineFunctionInfo {
/// being passed on the stack
unsigned ArgumentStackSize = 0;
+ /// Incoming ByVal arguments
+ SmallVector<SDValue, 8> IncomingByValArgs;
+
/// Is there any vector argument or return?
bool IsVectorCall = false;
@@ -150,6 +153,10 @@ class RISCVMachineFunctionInfo : public MachineFunctionInfo {
unsigned getArgumentStackSize() const { return ArgumentStackSize; }
void setArgumentStackSize(unsigned size) { ArgumentStackSize = size; }
+ void addIncomingByValArgs(SDValue Val) { IncomingByValArgs.push_back(Val); }
+ SDValue &getIncomingByValArgs(int Idx) { return IncomingByValArgs[Idx]; }
+ unsigned getIncomingByValArgsSize() { return IncomingByValArgs.size(); }
+
enum class PushPopKind { None = 0, StdExtZcmp, VendorXqccmp };
PushPopKind getPushPopKind(const MachineFunction &MF) const;
diff --git a/llvm/test/CodeGen/RISCV/musttail.ll b/llvm/test/CodeGen/RISCV/musttail.ll
index 32f9ac7fef470..4765fe7a4f233 100644
--- a/llvm/test/CodeGen/RISCV/musttail.ll
+++ b/llvm/test/CodeGen/RISCV/musttail.ll
@@ -393,3 +393,179 @@ entry:
musttail call void @sret_callee(ptr sret({ double, double }) align 8 %result)
ret void
}
+
+%twenty_bytes = type { [5 x i32] }
+declare void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4)
+
+; Functions with byval parameters can be tail-called, because the value is
+; actually passed in registers in the same way for the caller and callee.
+define void @large_caller(%twenty_bytes* byval(%twenty_bytes) align 4 %a) {
+; RV32-LABEL: large_caller:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: tail large_callee
+;
+; RV64-LABEL: large_caller:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: tail large_callee
+entry:
+ musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %a)
+ ret void
+}
+
+; As above, but with some inline asm to test that the arguments in r4 is
+; re-loaded before the call.
+define void @large_caller_check_regs(%twenty_bytes* byval(%twenty_bytes) align 4 %a) nounwind {
+; RV32-LABEL: large_caller_check_regs:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: #APP
+; RV32-NEXT: #NO_APP
+; RV32-NEXT: tail large_callee
+;
+; RV64-LABEL: large_caller_check_regs:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: #APP
+; RV64-NEXT: #NO_APP
+; RV64-NEXT: tail large_callee
+entry:
+ tail call void asm sideeffect "", "~{r4}"()
+ musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %a)
+ ret void
+}
+
+; The IR for this one looks dodgy, because it has an alloca passed to a
+; musttail function, but it is passed as a byval argument, so will be copied
+; into the stack space allocated by @large_caller_new_value's caller, so is
+; valid.
+define void @large_caller_new_value(%twenty_bytes* byval(%twenty_bytes) align 4 %a) nounwind {
+; RV32-LABEL: large_caller_new_value:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: li a1, 1
+; RV32-NEXT: li a2, 2
+; RV32-NEXT: li a3, 3
+; RV32-NEXT: li a4, 4
+; RV32-NEXT: sw zero, 12(sp)
+; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a2, 20(sp)
+; RV32-NEXT: sw a3, 24(sp)
+; RV32-NEXT: sw a4, 28(sp)
+; RV32-NEXT: sw a4, 16(a0)
+; RV32-NEXT: sw zero, 0(a0)
+; RV32-NEXT: sw a1, 4(a0)
+; RV32-NEXT: sw a2, 8(a0)
+; RV32-NEXT: sw a3, 12(a0)
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: tail large_callee
+;
+; RV64-LABEL: large_caller_new_value:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -32
+; RV64-NEXT: li a1, 1
+; RV64-NEXT: li a2, 2
+; RV64-NEXT: li a3, 3
+; RV64-NEXT: li a4, 4
+; RV64-NEXT: sw zero, 12(sp)
+; RV64-NEXT: sw a1, 16(sp)
+; RV64-NEXT: sw a2, 20(sp)
+; RV64-NEXT: sw a3, 24(sp)
+; RV64-NEXT: sw a4, 28(sp)
+; RV64-NEXT: sw a4, 16(a0)
+; RV64-NEXT: sw zero, 0(a0)
+; RV64-NEXT: sw a1, 4(a0)
+; RV64-NEXT: sw a2, 8(a0)
+; RV64-NEXT: sw a3, 12(a0)
+; RV64-NEXT: addi sp, sp, 32
+; RV64-NEXT: tail large_callee
+entry:
+ %y = alloca %twenty_bytes, align 4
+ store i32 0, ptr %y, align 4
+ %0 = getelementptr inbounds i8, ptr %y, i32 4
+ store i32 1, ptr %0, align 4
+ %1 = getelementptr inbounds i8, ptr %y, i32 8
+ store i32 2, ptr %1, align 4
+ %2 = getelementptr inbounds i8, ptr %y, i32 12
+ store i32 3, ptr %2, align 4
+ %3 = getelementptr inbounds i8, ptr %y, i32 16
+ store i32 4, ptr %3, align 4
+ musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %y)
+ ret void
+}
+
+declare void @two_byvals_callee(%twenty_bytes* byval(%twenty_bytes) align 4, %twenty_bytes* byval(%twenty_bytes) align 4)
+define void @swap_byvals(%twenty_bytes* byval(%twenty_bytes) align 4 %a, %twenty_bytes* byval(%twenty_bytes) align 4 %b) {
+; RV32-LABEL: swap_byvals:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: mv a2, a0
+; RV32-NEXT: mv a0, a1
+; RV32-NEXT: mv a1, a2
+; RV32-NEXT: tail two_byvals_callee
+;
+; RV64-LABEL: swap_byvals:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: mv a2, a0
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: mv a1, a2
+; RV64-NEXT: tail two_byvals_callee
+entry:
+ musttail call void @two_byvals_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %b, %twenty_bytes* byval(%twenty_bytes) align 4 %a)
+ ret void
+}
+
+; A forwarded byval arg, but in a different argument register, so it needs to
+; be moved between registers first. This can't be musttail because of the
+; different signatures, but is still tail-called as an optimisation.
+declare void @shift_byval_callee(%twenty_bytes* byval(%twenty_bytes) align 4)
+define void @shift_byval(i32 %a, %twenty_bytes* byval(%twenty_bytes) align 4 %b) {
+; RV32-LABEL: shift_byval:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: mv a0, a1
+; RV32-NEXT: tail shift_byval_callee
+;
+; RV64-LABEL: shift_byval:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: tail shift_byval_callee
+entry:
+ tail call void @shift_byval_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %b)
+ ret void
+}
+
+; A global object passed to a byval argument, so it must be copied, but doesn't
+; need a stack temporary.
+ at large_global = external global %twenty_bytes
+define void @large_caller_from_global(%twenty_bytes* byval(%twenty_bytes) align 4 %a) {
+; RV32-LABEL: large_caller_from_global:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: lui a1, %hi(large_global)
+; RV32-NEXT: addi a1, a1, %lo(large_global)
+; RV32-NEXT: lw a2, 16(a1)
+; RV32-NEXT: sw a2, 16(a0)
+; RV32-NEXT: lw a2, 12(a1)
+; RV32-NEXT: sw a2, 12(a0)
+; RV32-NEXT: lw a2, 8(a1)
+; RV32-NEXT: sw a2, 8(a0)
+; RV32-NEXT: lw a2, 4(a1)
+; RV32-NEXT: sw a2, 4(a0)
+; RV32-NEXT: lw a1, 0(a1)
+; RV32-NEXT: sw a1, 0(a0)
+; RV32-NEXT: tail large_callee
+;
+; RV64-LABEL: large_caller_from_global:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: lui a1, %hi(large_global)
+; RV64-NEXT: addi a1, a1, %lo(large_global)
+; RV64-NEXT: lw a2, 16(a1)
+; RV64-NEXT: sw a2, 16(a0)
+; RV64-NEXT: lw a2, 12(a1)
+; RV64-NEXT: sw a2, 12(a0)
+; RV64-NEXT: lw a2, 8(a1)
+; RV64-NEXT: sw a2, 8(a0)
+; RV64-NEXT: lw a2, 4(a1)
+; RV64-NEXT: sw a2, 4(a0)
+; RV64-NEXT: lw a1, 0(a1)
+; RV64-NEXT: sw a1, 0(a0)
+; RV64-NEXT: tail large_callee
+entry:
+ musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 @large_global)
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/tail-calls.ll b/llvm/test/CodeGen/RISCV/tail-calls.ll
index 8f63e320655c9..fa68006059fdb 100644
--- a/llvm/test/CodeGen/RISCV/tail-calls.ll
+++ b/llvm/test/CodeGen/RISCV/tail-calls.ll
@@ -247,24 +247,20 @@ declare i32 @callee_indirect_args(fp128 %a)
define void @caller_indirect_args() nounwind {
; CHECK-LABEL: caller_indirect_args:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi sp, sp, -32
-; CHECK-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: lui a1, 262128
; CHECK-NEXT: mv a0, sp
; CHECK-NEXT: sw zero, 0(sp)
; CHECK-NEXT: sw zero, 4(sp)
; CHECK-NEXT: sw zero, 8(sp)
; CHECK-NEXT: sw a1, 12(sp)
-; CHECK-NEXT: call callee_indirect_args
-; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; CHECK-NEXT: addi sp, sp, 32
-; CHECK-NEXT: ret
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: tail callee_indirect_args
;
; CHECK-LARGE-ZICFILP-LABEL: caller_indirect_args:
; CHECK-LARGE-ZICFILP: # %bb.0: # %entry
; CHECK-LARGE-ZICFILP-NEXT: lpad 0
-; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -32
-; CHECK-LARGE-ZICFILP-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -16
; CHECK-LARGE-ZICFILP-NEXT: lui a1, 262128
; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi9:
; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI7_0)
@@ -274,10 +270,8 @@ define void @caller_indirect_args() nounwind {
; CHECK-LARGE-ZICFILP-NEXT: sw zero, 4(sp)
; CHECK-LARGE-ZICFILP-NEXT: sw zero, 8(sp)
; CHECK-LARGE-ZICFILP-NEXT: sw a1, 12(sp)
-; CHECK-LARGE-ZICFILP-NEXT: jalr t2
-; CHECK-LARGE-ZICFILP-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 32
-; CHECK-LARGE-ZICFILP-NEXT: ret
+; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 16
+; CHECK-LARGE-ZICFILP-NEXT: jr t2
entry:
%call = tail call i32 @callee_indirect_args(fp128 0xL00000000000000003FFF000000000000)
ret void
@@ -400,24 +394,30 @@ define i32 @caller_byval() nounwind {
; CHECK-LABEL: caller_byval:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: lw a1, 12(sp)
-; CHECK-NEXT: addi a0, sp, 8
-; CHECK-NEXT: sw a1, 8(sp)
+; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a0, 8(sp)
+; CHECK-NEXT: sw a0, 4(sp)
+; CHECK-NEXT: addi a0, sp, 4
+; CHECK-NEXT: call callee_byval
+; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: tail callee_byval
+; CHECK-NEXT: ret
;
; CHECK-LARGE-ZICFILP-LABEL: caller_byval:
; CHECK-LARGE-ZICFILP: # %bb.0: # %entry
; CHECK-LARGE-ZICFILP-NEXT: lpad 0
; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -16
-; CHECK-LARGE-ZICFILP-NEXT: lw a1, 12(sp)
+; CHECK-LARGE-ZICFILP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK-LARGE-ZICFILP-NEXT: lw a0, 8(sp)
+; CHECK-LARGE-ZICFILP-NEXT: sw a0, 4(sp)
; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi12:
; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI10_0)
; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi12)(a0)
-; CHECK-LARGE-ZICFILP-NEXT: addi a0, sp, 8
-; CHECK-LARGE-ZICFILP-NEXT: sw a1, 8(sp)
+; CHECK-LARGE-ZICFILP-NEXT: addi a0, sp, 4
+; CHECK-LARGE-ZICFILP-NEXT: jalr t2
+; CHECK-LARGE-ZICFILP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 16
-; CHECK-LARGE-ZICFILP-NEXT: jr t2
+; CHECK-LARGE-ZICFILP-NEXT: ret
entry:
%a = alloca ptr
%r = tail call i32 @callee_byval(ptr byval(ptr) %a)
>From ef13b0ec8ab7a75d8d8fcaae6658672c8279a0d9 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert at folkertdev.nl>
Date: Wed, 3 Dec 2025 20:36:11 +0100
Subject: [PATCH 5/5] riscv: update 'failed to perform tail call elimination'
example
It is kind of trivial now, because musttail is accepted for more
signatures
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 +-
llvm/test/CodeGen/RISCV/musttail-call.ll | 9 +++++----
2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 52cc3f613454a..fc4f3e12ac6ad 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -23900,7 +23900,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
if (IsTailCall) {
- unsigned OpSize = (VA.getValVT().getSizeInBits() + 7) / 8;
+ unsigned OpSize = divideCeil(VA.getValVT().getSizeInBits(), 8);
int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
DstAddr = DAG.getFrameIndex(FI, PtrVT);
DstInfo = MachinePointerInfo::getFixedStack(MF, FI);
diff --git a/llvm/test/CodeGen/RISCV/musttail-call.ll b/llvm/test/CodeGen/RISCV/musttail-call.ll
index f6ec5307b8bad..a3ac3560378db 100644
--- a/llvm/test/CodeGen/RISCV/musttail-call.ll
+++ b/llvm/test/CodeGen/RISCV/musttail-call.ll
@@ -9,12 +9,13 @@
; RUN: not --crash llc -mtriple riscv64-unknown-elf -o - %s \
; RUN: 2>&1 | FileCheck %s
-%struct.A = type { i32 }
+declare void @callee_musttail()
-declare void @callee_musttail(ptr sret(%struct.A) %a)
-define void @caller_musttail(ptr sret(%struct.A) %a) {
+define void @caller_musttail() #0 {
; CHECK: LLVM ERROR: failed to perform tail call elimination on a call site marked musttail
entry:
- musttail call void @callee_musttail(ptr sret(%struct.A) %a)
+ musttail call void @callee_musttail()
ret void
}
+
+attributes #0 = { "interrupt"="machine" }
More information about the llvm-commits
mailing list