[llvm] Enhance RISCV machine outlining to support a tailcall strategy. (PR #117526)
Owen Anderson via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 24 23:43:27 PST 2024
https://github.com/resistor created https://github.com/llvm/llvm-project/pull/117526
This is modeled on the equivalent path in the AArch64 backend.
Whenever the outlining candidate ends in a terminator, we can use
a tail call to reach it, removing the need to use a link register
or to insert a return instruction in the outlined function.
This improves code size in a size-optimized build of an internal
benchmark by approximately 3%.
>From 6b66dce392a96c9a40010ce47c872f50e1757dcd Mon Sep 17 00:00:00 2001
From: Owen Anderson <resistor at mac.com>
Date: Mon, 25 Nov 2024 20:39:05 +1300
Subject: [PATCH] Enhance RISCV machine outlining to support a tailcall
strategy.
This is modeled on the equivalent path in the AArch64 backend.
Whenever the outlining candidate ends in a terminator, we can use
a tail call to reach it, removing the need to use a link register
or to insert a return instruction in the outlined function.
This improves code size in a size-optimized build of an internal
benchmark by approximately 3%.
---
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 107 +++++++++++++-----
.../CodeGen/RISCV/machine-outliner-cfi.mir | 20 ++--
.../machine-outliner-leaf-descendants.ll | 13 ++-
.../RISCV/machine-outliner-patchable.ll | 16 +--
.../RISCV/machine-outliner-position.mir | 18 +--
.../RISCV/machine-outliner-tailcall.ll | 57 ++++++++++
llvm/test/CodeGen/RISCV/machineoutliner.mir | 39 ++-----
7 files changed, 175 insertions(+), 95 deletions(-)
create mode 100644 llvm/test/CodeGen/RISCV/machine-outliner-tailcall.ll
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 933e776da47404..33d1d47a89d65f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -2925,9 +2925,42 @@ bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
return TargetInstrInfo::isMBBSafeToOutlineFrom(MBB, Flags);
}
-// Enum values indicating how an outlined call should be constructed.
+/// Constants defining how certain sequences should be outlined.
+/// This encompasses how an outlined function should be called, and what kind of
+/// frame should be emitted for that outlined function.
+///
+/// \p MachineOutlinerCallViaX5 implies that the function should be called with
+/// using X5 as an alternative link register.
+///
+/// That is,
+///
+/// I1 Materialize addr in X5 OUTLINED_FUNCTION:
+/// I2 --> JAL X5 I1
+/// I3 I2
+/// I3
+/// RET X5
+///
+/// * Call construction overhead: 2 insns
+/// * Frame construction overhead: 1 (ret)
+/// * Requires stack fixups? No
+///
+/// \p MachineOutlinerTailCall implies that the function is being created from
+/// a sequence of instructions ending in a return.
+///
+/// That is,
+///
+/// I1 OUTLINED_FUNCTION:
+/// I2 --> B OUTLINED_FUNCTION I1
+/// RET I2
+/// RET
+///
+/// * Call construction overhead: 2 insns
+/// * Frame construction overhead: 0 (Return included in sequence)
+/// * Requires stack fixups? No
+///
enum MachineOutlinerConstructionID {
- MachineOutlinerDefault
+ MachineOutlinerCallViaX5,
+ MachineOutlinerTailCall
};
bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault(
@@ -2941,14 +2974,33 @@ RISCVInstrInfo::getOutliningCandidateInfo(
std::vector<outliner::Candidate> &RepeatedSequenceLocs,
unsigned MinRepeats) const {
- // First we need to filter out candidates where the X5 register (IE t0) can't
- // be used to setup the function call.
- auto CannotInsertCall = [](outliner::Candidate &C) {
- const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo();
- return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI);
- };
+ // If the last instruction in any candidate is a terminator, then we should
+ // tail call all of the candidates.
+ bool IsTailCall = RepeatedSequenceLocs[0].back().isTerminator();
+
+ if (!IsTailCall) {
+ // Filter out candidates where the X5 register (IE t0) can't
+ // be used to setup the function call.
+ auto CannotInsertCall = [](outliner::Candidate &C) {
+ const TargetRegisterInfo *TRI =
+ C.getMF()->getSubtarget().getRegisterInfo();
+ if (!C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI))
+ return true;
- llvm::erase_if(RepeatedSequenceLocs, CannotInsertCall);
+ // Don't allow modifying the X5 register which we use for return addresses
+ // for these outlined functions.
+ for (const auto &MI : C) {
+ // FIXME: Why is this case not handled by isAvailableAcrossAndOutOfSeq
+ // above?
+ if (MI.modifiesRegister(RISCV::X5, TRI))
+ return true;
+ }
+
+ return false;
+ };
+
+ llvm::erase_if(RepeatedSequenceLocs, CannotInsertCall);
+ }
// If the sequence doesn't have enough candidates left, then we're done.
if (RepeatedSequenceLocs.size() < MinRepeats)
@@ -2961,8 +3013,12 @@ RISCVInstrInfo::getOutliningCandidateInfo(
// call t0, function = 8 bytes.
unsigned CallOverhead = 8;
+
+ MachineOutlinerConstructionID OutlinerType =
+ IsTailCall ? MachineOutlinerTailCall : MachineOutlinerCallViaX5;
+
for (auto &C : RepeatedSequenceLocs)
- C.setCallInfo(MachineOutlinerDefault, CallOverhead);
+ C.setCallInfo(OutlinerType, CallOverhead);
// jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled.
unsigned FrameOverhead = 4;
@@ -2972,9 +3028,12 @@ RISCVInstrInfo::getOutliningCandidateInfo(
.hasStdExtCOrZca())
FrameOverhead = 2;
+ // There is no overhead in the frame when doing a tail call.
+ if (IsTailCall)
+ FrameOverhead = 0;
+
return std::make_unique<outliner::OutlinedFunction>(
- RepeatedSequenceLocs, SequenceSize, FrameOverhead,
- MachineOutlinerDefault);
+ RepeatedSequenceLocs, SequenceSize, FrameOverhead, OutlinerType);
}
outliner::InstrType
@@ -2982,9 +3041,6 @@ RISCVInstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
MachineBasicBlock::iterator &MBBI,
unsigned Flags) const {
MachineInstr &MI = *MBBI;
- MachineBasicBlock *MBB = MI.getParent();
- const TargetRegisterInfo *TRI =
- MBB->getParent()->getSubtarget().getRegisterInfo();
const auto &F = MI.getMF()->getFunction();
// We can manually strip out CFI instructions later.
@@ -2995,17 +3051,6 @@ RISCVInstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
return F.needsUnwindTableEntry() ? outliner::InstrType::Illegal
: outliner::InstrType::Invisible;
- // We need support for tail calls to outlined functions before return
- // statements can be allowed.
- if (MI.isReturn())
- return outliner::InstrType::Illegal;
-
- // Don't allow modifying the X5 register which we use for return addresses for
- // these outlined functions.
- if (MI.modifiesRegister(RISCV::X5, TRI) ||
- MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5))
- return outliner::InstrType::Illegal;
-
// Make sure the operands don't reference something unsafe.
for (const auto &MO : MI.operands()) {
@@ -3041,6 +3086,9 @@ void RISCVInstrInfo::buildOutlinedFrame(
MBB.addLiveIn(RISCV::X5);
+ if (OF.FrameConstructionID == MachineOutlinerTailCall)
+ return;
+
// Add in a return instruction to the end of the outlined frame.
MBB.insert(MBB.end(), BuildMI(MF, DebugLoc(), get(RISCV::JALR))
.addReg(RISCV::X0, RegState::Define)
@@ -3052,6 +3100,13 @@ MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall(
Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
MachineFunction &MF, outliner::Candidate &C) const {
+ if (C.CallConstructionID == MachineOutlinerTailCall) {
+ It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(RISCV::PseudoTAIL))
+ .addGlobalAddress(M.getNamedValue(MF.getName()), 0,
+ RISCVII::MO_CALL));
+ return It;
+ }
+
// Add in a call instruction to the outlined function at the given location.
It = MBB.insert(It,
BuildMI(MF, DebugLoc(), get(RISCV::PseudoCALLReg), RISCV::X5)
diff --git a/llvm/test/CodeGen/RISCV/machine-outliner-cfi.mir b/llvm/test/CodeGen/RISCV/machine-outliner-cfi.mir
index 6ecca6a1b18ef8..3aca71643ecd72 100644
--- a/llvm/test/CodeGen/RISCV/machine-outliner-cfi.mir
+++ b/llvm/test/CodeGen/RISCV/machine-outliner-cfi.mir
@@ -22,13 +22,11 @@ body: |
; RV32I-MO-LABEL: name: func1
; RV32I-MO: liveins: $x10, $x11
; RV32I-MO-NEXT: {{ $}}
- ; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
- ; RV32I-MO-NEXT: PseudoRET
+ ; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
; RV64I-MO-LABEL: name: func1
; RV64I-MO: liveins: $x10, $x11
; RV64I-MO-NEXT: {{ $}}
- ; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
- ; RV64I-MO-NEXT: PseudoRET
+ ; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
$x10 = ORI $x10, 1023
CFI_INSTRUCTION offset $x1, 0
$x11 = ORI $x11, 1023
@@ -49,13 +47,11 @@ body: |
; RV32I-MO-LABEL: name: func2
; RV32I-MO: liveins: $x10, $x11
; RV32I-MO-NEXT: {{ $}}
- ; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
- ; RV32I-MO-NEXT: PseudoRET
+ ; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
; RV64I-MO-LABEL: name: func2
; RV64I-MO: liveins: $x10, $x11
; RV64I-MO-NEXT: {{ $}}
- ; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
- ; RV64I-MO-NEXT: PseudoRET
+ ; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
$x10 = ORI $x10, 1023
CFI_INSTRUCTION offset $x1, 0
$x11 = ORI $x11, 1023
@@ -76,13 +72,11 @@ body: |
; RV32I-MO-LABEL: name: func3
; RV32I-MO: liveins: $x10, $x11
; RV32I-MO-NEXT: {{ $}}
- ; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
- ; RV32I-MO-NEXT: PseudoRET
+ ; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
; RV64I-MO-LABEL: name: func3
; RV64I-MO: liveins: $x10, $x11
; RV64I-MO-NEXT: {{ $}}
- ; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
- ; RV64I-MO-NEXT: PseudoRET
+ ; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
$x10 = ORI $x10, 1023
CFI_INSTRUCTION offset $x1, -12
$x11 = ORI $x11, 1023
@@ -103,4 +97,4 @@ body: |
# OUTLINED-NEXT: $x12 = ADDI $x10, 17
# OUTLINED-NEXT: $x11 = AND $x12, $x11
# OUTLINED-NEXT: $x10 = SUB $x10, $x11
-# OUTLINED-NEXT: $x0 = JALR $x5, 0
+# OUTLINED-NEXT: PseudoRET
diff --git a/llvm/test/CodeGen/RISCV/machine-outliner-leaf-descendants.ll b/llvm/test/CodeGen/RISCV/machine-outliner-leaf-descendants.ll
index 8fab0aa9b6a76c..981661466120f4 100644
--- a/llvm/test/CodeGen/RISCV/machine-outliner-leaf-descendants.ll
+++ b/llvm/test/CodeGen/RISCV/machine-outliner-leaf-descendants.ll
@@ -94,7 +94,8 @@ define i32 @_Z2f6v() minsize {
; CHECK-BASELINE-NEXT: li a3, 0x4
; CHECK-BASELINE-NEXT: li a4, 0x5
; CHECK-BASELINE-NEXT: li a5, 0x6
-; CHECK-BASELINE-NEXT: jr t0
+; CHECK-BASELINE-NEXT: auipc t1, 0x0
+; CHECK-BASELINE-NEXT: jr t1 <OUTLINED_FUNCTION_0+0x18>
; CHECK-BASELINE: <OUTLINED_FUNCTION_1>:
; CHECK-BASELINE-NEXT: li a0, 0x1
@@ -102,8 +103,9 @@ define i32 @_Z2f6v() minsize {
; CHECK-BASELINE-NEXT: li a2, 0x3
; CHECK-BASELINE-NEXT: li a3, 0x4
; CHECK-BASELINE-NEXT: li a4, 0x5
-; CHECK-BASELINE-NEXT: li a5, 0x7
-; CHECK-BASELINE-NEXT: jr t0
+; CHECK-BASELINE-NEXT: li a5, 0x8
+; CHECK-BASELINE-NEXT: auipc t1, 0x0
+; CHECK-BASELINE-NEXT: jr t1 <OUTLINED_FUNCTION_1+0x18>
; CHECK-BASELINE: <OUTLINED_FUNCTION_2>:
; CHECK-BASELINE-NEXT: li a0, 0x1
@@ -111,8 +113,9 @@ define i32 @_Z2f6v() minsize {
; CHECK-BASELINE-NEXT: li a2, 0x3
; CHECK-BASELINE-NEXT: li a3, 0x4
; CHECK-BASELINE-NEXT: li a4, 0x5
-; CHECK-BASELINE-NEXT: li a5, 0x8
-; CHECK-BASELINE-NEXT: jr t0
+; CHECK-BASELINE-NEXT: li a5, 0x7
+; CHECK-BASELINE-NEXT: auipc t1, 0x0
+; CHECK-BASELINE-NEXT: jr t1 <OUTLINED_FUNCTION_2+0x18>
; CHECK-LEAF-DESCENDANTS: <OUTLINED_FUNCTION_0>:
; CHECK-LEAF-DESCENDANTS-NEXT: li a0, 0x1
diff --git a/llvm/test/CodeGen/RISCV/machine-outliner-patchable.ll b/llvm/test/CodeGen/RISCV/machine-outliner-patchable.ll
index 4ef3abd241577f..f6c293f3caf118 100644
--- a/llvm/test/CodeGen/RISCV/machine-outliner-patchable.ll
+++ b/llvm/test/CodeGen/RISCV/machine-outliner-patchable.ll
@@ -10,8 +10,8 @@ define void @fentry0(i1 %a) nounwind "fentry-call"="true" {
; CHECK-LABEL: fentry0:
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: # FEntry call
-; CHECK: # %bb.1:
-; CHECK-NEXT: call t0, OUTLINED_FUNCTION_1
+; CHECK: .LBB0_2:
+; CHECK-NEXT: tail OUTLINED_FUNCTION_0
entry:
br i1 %a, label %if.then, label %if.end
if.then:
@@ -26,8 +26,8 @@ define void @fentry1(i1 %a) nounwind "fentry-call"="true" {
; CHECK-LABEL: fentry1:
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: # FEntry call
-; CHECK: # %bb.1:
-; CHECK-NEXT: call t0, OUTLINED_FUNCTION_1
+; CHECK: .LBB1_2:
+; CHECK-NEXT: tail OUTLINED_FUNCTION_0
entry:
br i1 %a, label %if.then, label %if.end
if.then:
@@ -46,8 +46,8 @@ define void @patchable0(i1 %a) nounwind "patchable-function-entry"="2" {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: nop
; CHECK-NEXT: nop
-; CHECK: # %bb.1:
-; CHECK-NEXT: call t0, OUTLINED_FUNCTION_1
+; CHECK: .LBB2_2:
+; CHECK-NEXT: tail OUTLINED_FUNCTION_0
entry:
br i1 %a, label %if.then, label %if.end
if.then:
@@ -64,8 +64,8 @@ define void @patchable1(i1 %a) nounwind "patchable-function-entry"="2" {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: nop
; CHECK-NEXT: nop
-; CHECK: # %bb.1:
-; CHECK-NEXT: call t0, OUTLINED_FUNCTION_1
+; CHECK: .LBB3_2:
+; CHECK-NEXT: tail OUTLINED_FUNCTION_0
entry:
br i1 %a, label %if.then, label %if.end
if.then:
diff --git a/llvm/test/CodeGen/RISCV/machine-outliner-position.mir b/llvm/test/CodeGen/RISCV/machine-outliner-position.mir
index 715e212eecabb3..b384c5bca0a6f7 100644
--- a/llvm/test/CodeGen/RISCV/machine-outliner-position.mir
+++ b/llvm/test/CodeGen/RISCV/machine-outliner-position.mir
@@ -25,15 +25,13 @@ body: |
; RV32I-MO-NEXT: {{ $}}
; RV32I-MO-NEXT: $x10 = ORI $x10, 1023
; RV32I-MO-NEXT: EH_LABEL <mcsymbol .Ltmp0>
- ; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
- ; RV32I-MO-NEXT: PseudoRET
+ ; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
; RV64I-MO-LABEL: name: func1
; RV64I-MO: liveins: $x10, $x11
; RV64I-MO-NEXT: {{ $}}
; RV64I-MO-NEXT: $x10 = ORI $x10, 1023
; RV64I-MO-NEXT: EH_LABEL <mcsymbol .Ltmp0>
- ; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
- ; RV64I-MO-NEXT: PseudoRET
+ ; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
$x10 = ORI $x10, 1023
EH_LABEL <mcsymbol .Ltmp0>
$x11 = ORI $x11, 1023
@@ -53,15 +51,13 @@ body: |
; RV32I-MO-NEXT: {{ $}}
; RV32I-MO-NEXT: $x10 = ORI $x10, 1023
; RV32I-MO-NEXT: GC_LABEL <mcsymbol .Ltmp1>
- ; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
- ; RV32I-MO-NEXT: PseudoRET
+ ; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
; RV64I-MO-LABEL: name: func2
; RV64I-MO: liveins: $x10, $x11
; RV64I-MO-NEXT: {{ $}}
; RV64I-MO-NEXT: $x10 = ORI $x10, 1023
; RV64I-MO-NEXT: GC_LABEL <mcsymbol .Ltmp1>
- ; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
- ; RV64I-MO-NEXT: PseudoRET
+ ; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
$x10 = ORI $x10, 1023
GC_LABEL <mcsymbol .Ltmp1>
$x11 = ORI $x11, 1023
@@ -81,15 +77,13 @@ body: |
; RV32I-MO-NEXT: {{ $}}
; RV32I-MO-NEXT: $x10 = ORI $x10, 1023
; RV32I-MO-NEXT: ANNOTATION_LABEL <mcsymbol .Ltmp2>
- ; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
- ; RV32I-MO-NEXT: PseudoRET
+ ; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
; RV64I-MO-LABEL: name: func3
; RV64I-MO: liveins: $x10, $x11
; RV64I-MO-NEXT: {{ $}}
; RV64I-MO-NEXT: $x10 = ORI $x10, 1023
; RV64I-MO-NEXT: ANNOTATION_LABEL <mcsymbol .Ltmp2>
- ; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
- ; RV64I-MO-NEXT: PseudoRET
+ ; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
$x10 = ORI $x10, 1023
ANNOTATION_LABEL <mcsymbol .Ltmp2>
$x11 = ORI $x11, 1023
diff --git a/llvm/test/CodeGen/RISCV/machine-outliner-tailcall.ll b/llvm/test/CodeGen/RISCV/machine-outliner-tailcall.ll
new file mode 100644
index 00000000000000..1648a302b723e8
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/machine-outliner-tailcall.ll
@@ -0,0 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
+; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=riscv32 < %s | FileCheck %s
+source_filename = "/app/example.cpp"
+target datalayout = "e-m:e-p:32:32-i64:64-n32-S128"
+target triple = "riscv32-unknown-linux-gnu"
+
+; Function Attrs: minsize mustprogress optsize uwtable
+define dso_local noundef i32 @_Z3fooiiii(i32 noundef %0, i32 noundef %1, i32 noundef %2, i32 noundef %3) local_unnamed_addr #0 {
+ %5 = mul nsw i32 %1, %0
+ %6 = mul nsw i32 %3, %2
+ %7 = add nsw i32 %6, %5
+ %8 = tail call noundef i32 @_Z3bari(i32 noundef %7) #2
+ ret i32 %8
+}
+
+; Function Attrs: minsize optsize
+declare noundef i32 @_Z3bari(i32 noundef) local_unnamed_addr #1
+
+; Function Attrs: minsize mustprogress optsize uwtable
+define dso_local noundef i32 @_Z3foziiii(i32 noundef %0, i32 noundef %1, i32 noundef %2, i32 noundef %3) local_unnamed_addr #0 {
+ %5 = mul nsw i32 %1, %0
+ %6 = mul nsw i32 %3, %2
+ %7 = add nsw i32 %6, %5
+ %8 = tail call noundef i32 @_Z3bari(i32 noundef %7) #2
+ ret i32 %8
+}
+
+attributes #0 = { minsize mustprogress optsize uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic-rv32" "target-features"="+32bit,+a,+c,+d,+f,+m,+relax,+zaamo,+zalrsc,+zicsr,+zifencei,+zmmul,-b,-e,-experimental-smctr,-experimental-ssctr,-experimental-zalasr,-experimental-zicfilp,-experimental-zicfiss,-experimental-zvbc32e,-experimental-zvkgs,-h,-sha,-shcounterenw,-shgatpa,-shtvala,-shvsatpa,-shvstvala,-shvstvecd,-smaia,-smcdeleg,-smcsrind,-smdbltrp,-smepmp,-smmpm,-smnpm,-smrnmi,-smstateen,-ssaia,-ssccfg,-ssccptr,-sscofpmf,-sscounterenw,-sscsrind,-ssdbltrp,-ssnpm,-sspm,-ssqosid,-ssstateen,-ssstrict,-sstc,-sstvala,-sstvecd,-ssu64xl,-supm,-svade,-svadu,-svbare,-svinval,-svnapot,-svpbmt,-svvptc,-v,-xcvalu,-xcvbi,-xcvbitmanip,-xcvelw,-xcvmac,-xcvmem,-xcvsimd,-xsfcease,-xsfvcp,-xsfvfnrclipxfqf,-xsfvfwmaccqqq,-xsfvqmaccdod,-xsfvqmaccqoq,-xsifivecdiscarddlone,-xsifivecflushdlone,-xtheadba,-xtheadbb,-xtheadbs,-xtheadcmo,-xtheadcondmov,-xtheadfmemidx,-xtheadmac,-xtheadmemidx,-xtheadmempair,-xtheadsync,-xtheadvdot,-xventanacondops,-xwchc,-za128rs,-za64rs,-zabha,-zacas,-zama16b,-zawrs,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zca,-zcb,-zcd,-zce,-zcf,-zcmop,-zcmp,-zcmt,-zdinx,-zfa,-zfbfmin,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zic64b,-zicbom,-zicbop,-zicboz,-ziccamoa,-ziccif,-zicclsm,-ziccrse,-zicntr,-zicond,-zihintntl,-zihintpause,-zihpm,-zimop,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-ztso,-zvbb,-zvbc,-zve32f,-zve32x,-zve64d,-zve64f,-zve64x,-zvfbfmin,-zvfbfwma,-zvfh,-zvfhmin,-zvkb,-zvkg,-zvkn,-zvknc,-zvkned,-zvkng,-zvknha,-zvknhb,-zvks,-zvksc,-zvksed,-zvksg,-zvksh,-zvkt,-zvl1024b,-zvl128b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl32b,-zvl4096b,-zvl512b,-zvl64b,-zvl65536b,-zvl8192b" }
+attributes #1 = { minsize optsize "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic-rv32" "target-features"="+32bit,+a,+c,+d,+f,+m,+relax,+zaamo,+zalrsc,+zicsr,+zifencei,+zmmul,-b,-e,-experimental-smctr,-experimental-ssctr,-experimental-zalasr,-experimental-zicfilp,-experimental-zicfiss,-experimental-zvbc32e,-experimental-zvkgs,-h,-sha,-shcounterenw,-shgatpa,-shtvala,-shvsatpa,-shvstvala,-shvstvecd,-smaia,-smcdeleg,-smcsrind,-smdbltrp,-smepmp,-smmpm,-smnpm,-smrnmi,-smstateen,-ssaia,-ssccfg,-ssccptr,-sscofpmf,-sscounterenw,-sscsrind,-ssdbltrp,-ssnpm,-sspm,-ssqosid,-ssstateen,-ssstrict,-sstc,-sstvala,-sstvecd,-ssu64xl,-supm,-svade,-svadu,-svbare,-svinval,-svnapot,-svpbmt,-svvptc,-v,-xcvalu,-xcvbi,-xcvbitmanip,-xcvelw,-xcvmac,-xcvmem,-xcvsimd,-xsfcease,-xsfvcp,-xsfvfnrclipxfqf,-xsfvfwmaccqqq,-xsfvqmaccdod,-xsfvqmaccqoq,-xsifivecdiscarddlone,-xsifivecflushdlone,-xtheadba,-xtheadbb,-xtheadbs,-xtheadcmo,-xtheadcondmov,-xtheadfmemidx,-xtheadmac,-xtheadmemidx,-xtheadmempair,-xtheadsync,-xtheadvdot,-xventanacondops,-xwchc,-za128rs,-za64rs,-zabha,-zacas,-zama16b,-zawrs,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zca,-zcb,-zcd,-zce,-zcf,-zcmop,-zcmp,-zcmt,-zdinx,-zfa,-zfbfmin,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zic64b,-zicbom,-zicbop,-zicboz,-ziccamoa,-ziccif,-zicclsm,-ziccrse,-zicntr,-zicond,-zihintntl,-zihintpause,-zihpm,-zimop,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-ztso,-zvbb,-zvbc,-zve32f,-zve32x,-zve64d,-zve64f,-zve64x,-zvfbfmin,-zvfbfwma,-zvfh,-zvfhmin,-zvkb,-zvkg,-zvkn,-zvknc,-zvkned,-zvkng,-zvknha,-zvknhb,-zvks,-zvksc,-zvksed,-zvksg,-zvksh,-zvkt,-zvl1024b,-zvl128b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl32b,-zvl4096b,-zvl512b,-zvl64b,-zvl65536b,-zvl8192b" }
+attributes #2 = { minsize optsize }
+
+!llvm.module.flags = !{!0, !1, !2, !4, !5, !6, !7}
+!llvm.ident = !{!8}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, !"target-abi", !"ilp32d"}
+!2 = !{i32 6, !"riscv-isa", !3}
+!3 = !{!"rv32i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0_zmmul1p0_zaamo1p0_zalrsc1p0"}
+!4 = !{i32 8, !"PIC Level", i32 2}
+!5 = !{i32 7, !"PIE Level", i32 2}
+!6 = !{i32 7, !"uwtable", i32 2}
+!7 = !{i32 8, !"SmallDataLimit", i32 0}
+!8 = !{!"clang version 20.0.0git (https://github.com/llvm/llvm-project.git a5af6214dd0e9d53c66dc06bcd23540b05c70120)"}
+; CHECK-LABEL: _Z3fooiiii:
+; CHECK: # %bb.0:
+; CHECK-NEXT: tail OUTLINED_FUNCTION_0
+;
+; CHECK-LABEL: _Z3foziiii:
+; CHECK: # %bb.0:
+; CHECK-NEXT: tail OUTLINED_FUNCTION_0
+;
+; CHECK-LABEL: OUTLINED_FUNCTION_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mul a0, a1, a0
+; CHECK-NEXT: mul a1, a3, a2
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: tail _Z3bari
diff --git a/llvm/test/CodeGen/RISCV/machineoutliner.mir b/llvm/test/CodeGen/RISCV/machineoutliner.mir
index 0221257354fcfa..678dbc48dc5812 100644
--- a/llvm/test/CodeGen/RISCV/machineoutliner.mir
+++ b/llvm/test/CodeGen/RISCV/machineoutliner.mir
@@ -17,9 +17,6 @@
; Should not outline functions with named linker sections
define i32 @dont_outline_1(i32 %a, i32 %b) section "named" { ret i32 0 }
- ; Cannot outline if the X5 (t0) register is not free
- define i32 @dont_outline_2(i32 %a, i32 %b) { ret i32 0 }
-
...
---
name: outline_0
@@ -29,10 +26,10 @@ body: |
bb.0:
liveins: $x10, $x11
; RV32I-MO-LABEL: name: outline_0
- ; RV32I-MO: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
+ ; RV32I-MO: PseudoTAIL {{.*}} @OUTLINED_FUNCTION_0
;
; RV64I-MO-LABEL: name: outline_0
- ; RV64I-MO: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
+ ; RV64I-MO: PseudoTAIL {{.*}} @OUTLINED_FUNCTION_0
$x11 = ORI $x11, 1023
$x12 = ADDI $x10, 17
$x11 = AND $x12, $x11
@@ -48,10 +45,10 @@ body: |
bb.0:
liveins: $x10, $x11
; RV32I-MO-LABEL: name: outline_1
- ; RV32I-MO: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
+ ; RV32I-MO: PseudoTAIL {{.*}} @OUTLINED_FUNCTION_0
;
; RV64I-MO-LABEL: name: outline_1
- ; RV64I-MO: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
+ ; RV64I-MO: PseudoTAIL {{.*}} @OUTLINED_FUNCTION_0
$x11 = ORI $x11, 1023
$x12 = ADDI $x10, 17
$x11 = AND $x12, $x11
@@ -67,10 +64,10 @@ body: |
bb.0:
liveins: $x10, $x11
; RV32I-MO-LABEL: name: outline_2
- ; RV32I-MO: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
+ ; RV32I-MO: PseudoTAIL {{.*}} @OUTLINED_FUNCTION_0
;
; RV64I-MO-LABEL: name: outline_2
- ; RV64I-MO: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
+ ; RV64I-MO: PseudoTAIL {{.*}} @OUTLINED_FUNCTION_0
$x11 = ORI $x11, 1023
$x12 = ADDI $x10, 17
$x11 = AND $x12, $x11
@@ -86,10 +83,10 @@ body: |
bb.0:
liveins: $x10, $x11
; RV32I-MO-LABEL: name: dont_outline_0
- ; RV32I-MO-NOT: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
+ ; RV32I-MO-NOT: @OUTLINED_FUNCTION_0
;
; RV64I-MO-LABEL: name: dont_outline_0
- ; RV64I-MO-NOT: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
+ ; RV64I-MO-NOT: @OUTLINED_FUNCTION_0
$x11 = ORI $x11, 1023
$x12 = ADDI $x10, 17
$x11 = AND $x12, $x11
@@ -115,26 +112,6 @@ body: |
$x10 = SUB $x10, $x11
PseudoRET implicit $x10
-...
----
-name: dont_outline_2
-tracksRegLiveness: true
-isOutlined: false
-body: |
- bb.0:
- liveins: $x10, $x11, $x5
- ; RV32I-MO-LABEL: name: dont_outline_2
- ; RV32I-MO-NOT: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
- ;
- ; RV64I-MO-LABEL: name: dont_outline_2
- ; RV64I-MO-NOT: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
- $x11 = ORI $x11, 1023
- $x12 = ADDI $x10, 17
- $x11 = AND $x12, $x11
- $x10 = SUB $x10, $x11
- $x10 = ADD $x10, $x5
- PseudoRET implicit $x10
-
...
# CHECK-LABEL: name: OUTLINED_FUNCTION_0
More information about the llvm-commits
mailing list