[llvm] Enhance RISCV machine outlining to support a tailcall strategy. (PR #117526)

Owen Anderson via llvm-commits llvm-commits at lists.llvm.org
Sun Nov 24 23:43:27 PST 2024


https://github.com/resistor created https://github.com/llvm/llvm-project/pull/117526

This is modeled on the equivalent path in the AArch64 backend.
Whenever the outlining candidate ends in a terminator, we can use
a tail call to reach it, removing the need to use a link register
or to insert a return instruction in the outlined function.

This improves code size in a size-optimized build of an internal
benchmark by approximately 3%.


>From 6b66dce392a96c9a40010ce47c872f50e1757dcd Mon Sep 17 00:00:00 2001
From: Owen Anderson <resistor at mac.com>
Date: Mon, 25 Nov 2024 20:39:05 +1300
Subject: [PATCH] Enhance RISCV machine outlining to support a tailcall
 strategy.

This is modeled on the equivalent path in the AArch64 backend.
Whenever the outlining candidate ends in a terminator, we can use
a tail call to reach it, removing the need to use a link register
or to insert a return instruction in the outlined function.

This improves code size in a size-optimized build of an internal
benchmark by approximately 3%.
---
 llvm/lib/Target/RISCV/RISCVInstrInfo.cpp      | 107 +++++++++++++-----
 .../CodeGen/RISCV/machine-outliner-cfi.mir    |  20 ++--
 .../machine-outliner-leaf-descendants.ll      |  13 ++-
 .../RISCV/machine-outliner-patchable.ll       |  16 +--
 .../RISCV/machine-outliner-position.mir       |  18 +--
 .../RISCV/machine-outliner-tailcall.ll        |  57 ++++++++++
 llvm/test/CodeGen/RISCV/machineoutliner.mir   |  39 ++-----
 7 files changed, 175 insertions(+), 95 deletions(-)
 create mode 100644 llvm/test/CodeGen/RISCV/machine-outliner-tailcall.ll

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 933e776da47404..33d1d47a89d65f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -2925,9 +2925,42 @@ bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
   return TargetInstrInfo::isMBBSafeToOutlineFrom(MBB, Flags);
 }
 
-// Enum values indicating how an outlined call should be constructed.
+/// Constants defining how certain sequences should be outlined.
+/// This encompasses how an outlined function should be called, and what kind of
+/// frame should be emitted for that outlined function.
+///
+/// \p MachineOutlinerCallViaX5 implies that the function should be called with
+/// using X5 as an alternative link register.
+///
+/// That is,
+///
+/// I1     Materialize addr in X5     OUTLINED_FUNCTION:
+/// I2 --> JAL X5                     I1
+/// I3                                I2
+///                                   I3
+///                                   RET X5
+///
+/// * Call construction overhead: 2 insns
+/// * Frame construction overhead: 1 (ret)
+/// * Requires stack fixups? No
+///
+/// \p MachineOutlinerTailCall implies that the function is being created from
+/// a sequence of instructions ending in a return.
+///
+/// That is,
+///
+/// I1                             OUTLINED_FUNCTION:
+/// I2 --> B OUTLINED_FUNCTION     I1
+/// RET                            I2
+///                                RET
+///
+/// * Call construction overhead: 2 insns
+/// * Frame construction overhead: 0 (Return included in sequence)
+/// * Requires stack fixups? No
+///
 enum MachineOutlinerConstructionID {
-  MachineOutlinerDefault
+  MachineOutlinerCallViaX5,
+  MachineOutlinerTailCall
 };
 
 bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault(
@@ -2941,14 +2974,33 @@ RISCVInstrInfo::getOutliningCandidateInfo(
     std::vector<outliner::Candidate> &RepeatedSequenceLocs,
     unsigned MinRepeats) const {
 
-  // First we need to filter out candidates where the X5 register (IE t0) can't
-  // be used to setup the function call.
-  auto CannotInsertCall = [](outliner::Candidate &C) {
-    const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo();
-    return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI);
-  };
+  // If the last instruction in any candidate is a terminator, then we should
+  // tail call all of the candidates.
+  bool IsTailCall = RepeatedSequenceLocs[0].back().isTerminator();
+
+  if (!IsTailCall) {
+    // Filter out candidates where the X5 register (IE t0) can't
+    // be used to setup the function call.
+    auto CannotInsertCall = [](outliner::Candidate &C) {
+      const TargetRegisterInfo *TRI =
+          C.getMF()->getSubtarget().getRegisterInfo();
+      if (!C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI))
+        return true;
 
-  llvm::erase_if(RepeatedSequenceLocs, CannotInsertCall);
+      // Don't allow modifying the X5 register which we use for return addresses
+      // for these outlined functions.
+      for (const auto &MI : C) {
+        // FIXME: Why is this case not handled by isAvailableAcrossAndOutOfSeq
+        // above?
+        if (MI.modifiesRegister(RISCV::X5, TRI))
+          return true;
+      }
+
+      return false;
+    };
+
+    llvm::erase_if(RepeatedSequenceLocs, CannotInsertCall);
+  }
 
   // If the sequence doesn't have enough candidates left, then we're done.
   if (RepeatedSequenceLocs.size() < MinRepeats)
@@ -2961,8 +3013,12 @@ RISCVInstrInfo::getOutliningCandidateInfo(
 
   // call t0, function = 8 bytes.
   unsigned CallOverhead = 8;
+
+  MachineOutlinerConstructionID OutlinerType =
+      IsTailCall ? MachineOutlinerTailCall : MachineOutlinerCallViaX5;
+
   for (auto &C : RepeatedSequenceLocs)
-    C.setCallInfo(MachineOutlinerDefault, CallOverhead);
+    C.setCallInfo(OutlinerType, CallOverhead);
 
   // jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled.
   unsigned FrameOverhead = 4;
@@ -2972,9 +3028,12 @@ RISCVInstrInfo::getOutliningCandidateInfo(
           .hasStdExtCOrZca())
     FrameOverhead = 2;
 
+  // There is no overhead in the frame when doing a tail call.
+  if (IsTailCall)
+    FrameOverhead = 0;
+
   return std::make_unique<outliner::OutlinedFunction>(
-      RepeatedSequenceLocs, SequenceSize, FrameOverhead,
-      MachineOutlinerDefault);
+      RepeatedSequenceLocs, SequenceSize, FrameOverhead, OutlinerType);
 }
 
 outliner::InstrType
@@ -2982,9 +3041,6 @@ RISCVInstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
                                      MachineBasicBlock::iterator &MBBI,
                                      unsigned Flags) const {
   MachineInstr &MI = *MBBI;
-  MachineBasicBlock *MBB = MI.getParent();
-  const TargetRegisterInfo *TRI =
-      MBB->getParent()->getSubtarget().getRegisterInfo();
   const auto &F = MI.getMF()->getFunction();
 
   // We can manually strip out CFI instructions later.
@@ -2995,17 +3051,6 @@ RISCVInstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
     return F.needsUnwindTableEntry() ? outliner::InstrType::Illegal
                                      : outliner::InstrType::Invisible;
 
-  // We need support for tail calls to outlined functions before return
-  // statements can be allowed.
-  if (MI.isReturn())
-    return outliner::InstrType::Illegal;
-
-  // Don't allow modifying the X5 register which we use for return addresses for
-  // these outlined functions.
-  if (MI.modifiesRegister(RISCV::X5, TRI) ||
-      MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5))
-    return outliner::InstrType::Illegal;
-
   // Make sure the operands don't reference something unsafe.
   for (const auto &MO : MI.operands()) {
 
@@ -3041,6 +3086,9 @@ void RISCVInstrInfo::buildOutlinedFrame(
 
   MBB.addLiveIn(RISCV::X5);
 
+  if (OF.FrameConstructionID == MachineOutlinerTailCall)
+    return;
+
   // Add in a return instruction to the end of the outlined frame.
   MBB.insert(MBB.end(), BuildMI(MF, DebugLoc(), get(RISCV::JALR))
       .addReg(RISCV::X0, RegState::Define)
@@ -3052,6 +3100,13 @@ MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall(
     Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
     MachineFunction &MF, outliner::Candidate &C) const {
 
+  if (C.CallConstructionID == MachineOutlinerTailCall) {
+    It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(RISCV::PseudoTAIL))
+                            .addGlobalAddress(M.getNamedValue(MF.getName()), 0,
+                                              RISCVII::MO_CALL));
+    return It;
+  }
+
   // Add in a call instruction to the outlined function at the given location.
   It = MBB.insert(It,
                   BuildMI(MF, DebugLoc(), get(RISCV::PseudoCALLReg), RISCV::X5)
diff --git a/llvm/test/CodeGen/RISCV/machine-outliner-cfi.mir b/llvm/test/CodeGen/RISCV/machine-outliner-cfi.mir
index 6ecca6a1b18ef8..3aca71643ecd72 100644
--- a/llvm/test/CodeGen/RISCV/machine-outliner-cfi.mir
+++ b/llvm/test/CodeGen/RISCV/machine-outliner-cfi.mir
@@ -22,13 +22,11 @@ body:             |
     ; RV32I-MO-LABEL: name: func1
     ; RV32I-MO: liveins: $x10, $x11
     ; RV32I-MO-NEXT: {{  $}}
-    ; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
-    ; RV32I-MO-NEXT: PseudoRET
+    ; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     ; RV64I-MO-LABEL: name: func1
     ; RV64I-MO: liveins: $x10, $x11
     ; RV64I-MO-NEXT: {{  $}}
-    ; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
-    ; RV64I-MO-NEXT: PseudoRET
+    ; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     $x10 = ORI $x10, 1023
     CFI_INSTRUCTION offset $x1, 0
     $x11 = ORI $x11, 1023
@@ -49,13 +47,11 @@ body:             |
     ; RV32I-MO-LABEL: name: func2
     ; RV32I-MO: liveins: $x10, $x11
     ; RV32I-MO-NEXT: {{  $}}
-    ; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
-    ; RV32I-MO-NEXT: PseudoRET
+    ; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     ; RV64I-MO-LABEL: name: func2
     ; RV64I-MO: liveins: $x10, $x11
     ; RV64I-MO-NEXT: {{  $}}
-    ; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
-    ; RV64I-MO-NEXT: PseudoRET
+    ; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     $x10 = ORI $x10, 1023
     CFI_INSTRUCTION offset $x1, 0
     $x11 = ORI $x11, 1023
@@ -76,13 +72,11 @@ body:             |
     ; RV32I-MO-LABEL: name: func3
     ; RV32I-MO: liveins: $x10, $x11
     ; RV32I-MO-NEXT: {{  $}}
-    ; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
-    ; RV32I-MO-NEXT: PseudoRET
+    ; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     ; RV64I-MO-LABEL: name: func3
     ; RV64I-MO: liveins: $x10, $x11
     ; RV64I-MO-NEXT: {{  $}}
-    ; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
-    ; RV64I-MO-NEXT: PseudoRET
+    ; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     $x10 = ORI $x10, 1023
     CFI_INSTRUCTION offset $x1, -12
     $x11 = ORI $x11, 1023
@@ -103,4 +97,4 @@ body:             |
 # OUTLINED-NEXT: $x12 = ADDI $x10, 17
 # OUTLINED-NEXT: $x11 = AND $x12, $x11
 # OUTLINED-NEXT: $x10 = SUB $x10, $x11
-# OUTLINED-NEXT: $x0 = JALR $x5, 0
+# OUTLINED-NEXT: PseudoRET
diff --git a/llvm/test/CodeGen/RISCV/machine-outliner-leaf-descendants.ll b/llvm/test/CodeGen/RISCV/machine-outliner-leaf-descendants.ll
index 8fab0aa9b6a76c..981661466120f4 100644
--- a/llvm/test/CodeGen/RISCV/machine-outliner-leaf-descendants.ll
+++ b/llvm/test/CodeGen/RISCV/machine-outliner-leaf-descendants.ll
@@ -94,7 +94,8 @@ define i32 @_Z2f6v() minsize {
 ; CHECK-BASELINE-NEXT:	li	a3, 0x4
 ; CHECK-BASELINE-NEXT:	li	a4, 0x5
 ; CHECK-BASELINE-NEXT:	li	a5, 0x6
-; CHECK-BASELINE-NEXT:	jr	t0
+; CHECK-BASELINE-NEXT:	auipc t1, 0x0
+; CHECK-BASELINE-NEXT:	jr t1 <OUTLINED_FUNCTION_0+0x18>
 
 ; CHECK-BASELINE: <OUTLINED_FUNCTION_1>:
 ; CHECK-BASELINE-NEXT:	li	a0, 0x1
@@ -102,8 +103,9 @@ define i32 @_Z2f6v() minsize {
 ; CHECK-BASELINE-NEXT:	li	a2, 0x3
 ; CHECK-BASELINE-NEXT:	li	a3, 0x4
 ; CHECK-BASELINE-NEXT:	li	a4, 0x5
-; CHECK-BASELINE-NEXT:	li	a5, 0x7
-; CHECK-BASELINE-NEXT:	jr	t0
+; CHECK-BASELINE-NEXT:	li	a5, 0x8
+; CHECK-BASELINE-NEXT:	auipc t1, 0x0
+; CHECK-BASELINE-NEXT:	jr t1 <OUTLINED_FUNCTION_1+0x18>
 
 ; CHECK-BASELINE: <OUTLINED_FUNCTION_2>:
 ; CHECK-BASELINE-NEXT:	li	a0, 0x1
@@ -111,8 +113,9 @@ define i32 @_Z2f6v() minsize {
 ; CHECK-BASELINE-NEXT:	li	a2, 0x3
 ; CHECK-BASELINE-NEXT:	li	a3, 0x4
 ; CHECK-BASELINE-NEXT:	li	a4, 0x5
-; CHECK-BASELINE-NEXT:	li	a5, 0x8
-; CHECK-BASELINE-NEXT:	jr	t0
+; CHECK-BASELINE-NEXT:	li	a5, 0x7
+; CHECK-BASELINE-NEXT:	auipc t1, 0x0
+; CHECK-BASELINE-NEXT:	jr t1 <OUTLINED_FUNCTION_2+0x18>
 
 ; CHECK-LEAF-DESCENDANTS: <OUTLINED_FUNCTION_0>:
 ; CHECK-LEAF-DESCENDANTS-NEXT:	li	a0, 0x1
diff --git a/llvm/test/CodeGen/RISCV/machine-outliner-patchable.ll b/llvm/test/CodeGen/RISCV/machine-outliner-patchable.ll
index 4ef3abd241577f..f6c293f3caf118 100644
--- a/llvm/test/CodeGen/RISCV/machine-outliner-patchable.ll
+++ b/llvm/test/CodeGen/RISCV/machine-outliner-patchable.ll
@@ -10,8 +10,8 @@ define void @fentry0(i1 %a) nounwind "fentry-call"="true" {
 ; CHECK-LABEL: fentry0:
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    # FEntry call
-; CHECK:       # %bb.1:
-; CHECK-NEXT:    call t0, OUTLINED_FUNCTION_1
+; CHECK:       .LBB0_2:
+; CHECK-NEXT:    tail OUTLINED_FUNCTION_0
 entry:
   br i1 %a, label %if.then, label %if.end
 if.then:
@@ -26,8 +26,8 @@ define void @fentry1(i1 %a) nounwind "fentry-call"="true" {
 ; CHECK-LABEL: fentry1:
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    # FEntry call
-; CHECK:       # %bb.1:
-; CHECK-NEXT:    call t0, OUTLINED_FUNCTION_1
+; CHECK:       .LBB1_2:
+; CHECK-NEXT:    tail OUTLINED_FUNCTION_0
 entry:
   br i1 %a, label %if.then, label %if.end
 if.then:
@@ -46,8 +46,8 @@ define void @patchable0(i1 %a) nounwind "patchable-function-entry"="2" {
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    nop
-; CHECK:       # %bb.1:
-; CHECK-NEXT:    call t0, OUTLINED_FUNCTION_1
+; CHECK:       .LBB2_2:
+; CHECK-NEXT:    tail OUTLINED_FUNCTION_0
 entry:
   br i1 %a, label %if.then, label %if.end
 if.then:
@@ -64,8 +64,8 @@ define void @patchable1(i1 %a) nounwind "patchable-function-entry"="2" {
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    nop
-; CHECK:       # %bb.1:
-; CHECK-NEXT:    call t0, OUTLINED_FUNCTION_1
+; CHECK:       .LBB3_2:
+; CHECK-NEXT:    tail OUTLINED_FUNCTION_0
 entry:
   br i1 %a, label %if.then, label %if.end
 if.then:
diff --git a/llvm/test/CodeGen/RISCV/machine-outliner-position.mir b/llvm/test/CodeGen/RISCV/machine-outliner-position.mir
index 715e212eecabb3..b384c5bca0a6f7 100644
--- a/llvm/test/CodeGen/RISCV/machine-outliner-position.mir
+++ b/llvm/test/CodeGen/RISCV/machine-outliner-position.mir
@@ -25,15 +25,13 @@ body:             |
     ; RV32I-MO-NEXT: {{  $}}
     ; RV32I-MO-NEXT: $x10 = ORI $x10, 1023
     ; RV32I-MO-NEXT: EH_LABEL <mcsymbol .Ltmp0>
-    ; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
-    ; RV32I-MO-NEXT: PseudoRET
+    ; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     ; RV64I-MO-LABEL: name: func1
     ; RV64I-MO: liveins: $x10, $x11
     ; RV64I-MO-NEXT: {{  $}}
     ; RV64I-MO-NEXT: $x10 = ORI $x10, 1023
     ; RV64I-MO-NEXT: EH_LABEL <mcsymbol .Ltmp0>
-    ; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
-    ; RV64I-MO-NEXT: PseudoRET
+    ; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     $x10 = ORI $x10, 1023
     EH_LABEL <mcsymbol .Ltmp0>
     $x11 = ORI $x11, 1023
@@ -53,15 +51,13 @@ body:             |
     ; RV32I-MO-NEXT: {{  $}}
     ; RV32I-MO-NEXT: $x10 = ORI $x10, 1023
     ; RV32I-MO-NEXT: GC_LABEL <mcsymbol .Ltmp1>
-    ; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
-    ; RV32I-MO-NEXT: PseudoRET
+    ; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     ; RV64I-MO-LABEL: name: func2
     ; RV64I-MO: liveins: $x10, $x11
     ; RV64I-MO-NEXT: {{  $}}
     ; RV64I-MO-NEXT: $x10 = ORI $x10, 1023
     ; RV64I-MO-NEXT: GC_LABEL <mcsymbol .Ltmp1>
-    ; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
-    ; RV64I-MO-NEXT: PseudoRET
+    ; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     $x10 = ORI $x10, 1023
     GC_LABEL <mcsymbol .Ltmp1>
     $x11 = ORI $x11, 1023
@@ -81,15 +77,13 @@ body:             |
     ; RV32I-MO-NEXT: {{  $}}
     ; RV32I-MO-NEXT: $x10 = ORI $x10, 1023
     ; RV32I-MO-NEXT: ANNOTATION_LABEL <mcsymbol .Ltmp2>
-    ; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
-    ; RV32I-MO-NEXT: PseudoRET
+    ; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     ; RV64I-MO-LABEL: name: func3
     ; RV64I-MO: liveins: $x10, $x11
     ; RV64I-MO-NEXT: {{  $}}
     ; RV64I-MO-NEXT: $x10 = ORI $x10, 1023
     ; RV64I-MO-NEXT: ANNOTATION_LABEL <mcsymbol .Ltmp2>
-    ; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
-    ; RV64I-MO-NEXT: PseudoRET
+    ; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     $x10 = ORI $x10, 1023
     ANNOTATION_LABEL <mcsymbol .Ltmp2>
     $x11 = ORI $x11, 1023
diff --git a/llvm/test/CodeGen/RISCV/machine-outliner-tailcall.ll b/llvm/test/CodeGen/RISCV/machine-outliner-tailcall.ll
new file mode 100644
index 00000000000000..1648a302b723e8
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/machine-outliner-tailcall.ll
@@ -0,0 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
+; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=riscv32 < %s | FileCheck %s
+source_filename = "/app/example.cpp"
+target datalayout = "e-m:e-p:32:32-i64:64-n32-S128"
+target triple = "riscv32-unknown-linux-gnu"
+
+; Function Attrs: minsize mustprogress optsize uwtable
+define dso_local noundef i32 @_Z3fooiiii(i32 noundef %0, i32 noundef %1, i32 noundef %2, i32 noundef %3) local_unnamed_addr #0 {
+  %5 = mul nsw i32 %1, %0
+  %6 = mul nsw i32 %3, %2
+  %7 = add nsw i32 %6, %5
+  %8 = tail call noundef i32 @_Z3bari(i32 noundef %7) #2
+  ret i32 %8
+}
+
+; Function Attrs: minsize optsize
+declare noundef i32 @_Z3bari(i32 noundef) local_unnamed_addr #1
+
+; Function Attrs: minsize mustprogress optsize uwtable
+define dso_local noundef i32 @_Z3foziiii(i32 noundef %0, i32 noundef %1, i32 noundef %2, i32 noundef %3) local_unnamed_addr #0 {
+  %5 = mul nsw i32 %1, %0
+  %6 = mul nsw i32 %3, %2
+  %7 = add nsw i32 %6, %5
+  %8 = tail call noundef i32 @_Z3bari(i32 noundef %7) #2
+  ret i32 %8
+}
+
+attributes #0 = { minsize mustprogress optsize uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic-rv32" "target-features"="+32bit,+a,+c,+d,+f,+m,+relax,+zaamo,+zalrsc,+zicsr,+zifencei,+zmmul,-b,-e,-experimental-smctr,-experimental-ssctr,-experimental-zalasr,-experimental-zicfilp,-experimental-zicfiss,-experimental-zvbc32e,-experimental-zvkgs,-h,-sha,-shcounterenw,-shgatpa,-shtvala,-shvsatpa,-shvstvala,-shvstvecd,-smaia,-smcdeleg,-smcsrind,-smdbltrp,-smepmp,-smmpm,-smnpm,-smrnmi,-smstateen,-ssaia,-ssccfg,-ssccptr,-sscofpmf,-sscounterenw,-sscsrind,-ssdbltrp,-ssnpm,-sspm,-ssqosid,-ssstateen,-ssstrict,-sstc,-sstvala,-sstvecd,-ssu64xl,-supm,-svade,-svadu,-svbare,-svinval,-svnapot,-svpbmt,-svvptc,-v,-xcvalu,-xcvbi,-xcvbitmanip,-xcvelw,-xcvmac,-xcvmem,-xcvsimd,-xsfcease,-xsfvcp,-xsfvfnrclipxfqf,-xsfvfwmaccqqq,-xsfvqmaccdod,-xsfvqmaccqoq,-xsifivecdiscarddlone,-xsifivecflushdlone,-xtheadba,-xtheadbb,-xtheadbs,-xtheadcmo,-xtheadcondmov,-xtheadfmemidx,-xtheadmac,-xtheadmemidx,-xtheadmempair,-xtheadsync,-xtheadvdot,-xventanacondops,-xwchc,-za128rs,-za64rs,-zabha,-zacas,-zama16b,-zawrs,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zca,-zcb,-zcd,-zce,-zcf,-zcmop,-zcmp,-zcmt,-zdinx,-zfa,-zfbfmin,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zic64b,-zicbom,-zicbop,-zicboz,-ziccamoa,-ziccif,-zicclsm,-ziccrse,-zicntr,-zicond,-zihintntl,-zihintpause,-zihpm,-zimop,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-ztso,-zvbb,-zvbc,-zve32f,-zve32x,-zve64d,-zve64f,-zve64x,-zvfbfmin,-zvfbfwma,-zvfh,-zvfhmin,-zvkb,-zvkg,-zvkn,-zvknc,-zvkned,-zvkng,-zvknha,-zvknhb,-zvks,-zvksc,-zvksed,-zvksg,-zvksh,-zvkt,-zvl1024b,-zvl128b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl32b,-zvl4096b,-zvl512b,-zvl64b,-zvl65536b,-zvl8192b" }
+attributes #1 = { minsize optsize "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic-rv32" "target-features"="+32bit,+a,+c,+d,+f,+m,+relax,+zaamo,+zalrsc,+zicsr,+zifencei,+zmmul,-b,-e,-experimental-smctr,-experimental-ssctr,-experimental-zalasr,-experimental-zicfilp,-experimental-zicfiss,-experimental-zvbc32e,-experimental-zvkgs,-h,-sha,-shcounterenw,-shgatpa,-shtvala,-shvsatpa,-shvstvala,-shvstvecd,-smaia,-smcdeleg,-smcsrind,-smdbltrp,-smepmp,-smmpm,-smnpm,-smrnmi,-smstateen,-ssaia,-ssccfg,-ssccptr,-sscofpmf,-sscounterenw,-sscsrind,-ssdbltrp,-ssnpm,-sspm,-ssqosid,-ssstateen,-ssstrict,-sstc,-sstvala,-sstvecd,-ssu64xl,-supm,-svade,-svadu,-svbare,-svinval,-svnapot,-svpbmt,-svvptc,-v,-xcvalu,-xcvbi,-xcvbitmanip,-xcvelw,-xcvmac,-xcvmem,-xcvsimd,-xsfcease,-xsfvcp,-xsfvfnrclipxfqf,-xsfvfwmaccqqq,-xsfvqmaccdod,-xsfvqmaccqoq,-xsifivecdiscarddlone,-xsifivecflushdlone,-xtheadba,-xtheadbb,-xtheadbs,-xtheadcmo,-xtheadcondmov,-xtheadfmemidx,-xtheadmac,-xtheadmemidx,-xtheadmempair,-xtheadsync,-xtheadvdot,-xventanacondops,-xwchc,-za128rs,-za64rs,-zabha,-zacas,-zama16b,-zawrs,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zca,-zcb,-zcd,-zce,-zcf,-zcmop,-zcmp,-zcmt,-zdinx,-zfa,-zfbfmin,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zic64b,-zicbom,-zicbop,-zicboz,-ziccamoa,-ziccif,-zicclsm,-ziccrse,-zicntr,-zicond,-zihintntl,-zihintpause,-zihpm,-zimop,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-ztso,-zvbb,-zvbc,-zve32f,-zve32x,-zve64d,-zve64f,-zve64x,-zvfbfmin,-zvfbfwma,-zvfh,-zvfhmin,-zvkb,-zvkg,-zvkn,-zvknc,-zvkned,-zvkng,-zvknha,-zvknhb,-zvks,-zvksc,-zvksed,-zvksg,-zvksh,-zvkt,-zvl1024b,-zvl128b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl32b,-zvl4096b,-zvl512b,-zvl64b,-zvl65536b,-zvl8192b" }
+attributes #2 = { minsize optsize }
+
+!llvm.module.flags = !{!0, !1, !2, !4, !5, !6, !7}
+!llvm.ident = !{!8}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, !"target-abi", !"ilp32d"}
+!2 = !{i32 6, !"riscv-isa", !3}
+!3 = !{!"rv32i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0_zmmul1p0_zaamo1p0_zalrsc1p0"}
+!4 = !{i32 8, !"PIC Level", i32 2}
+!5 = !{i32 7, !"PIE Level", i32 2}
+!6 = !{i32 7, !"uwtable", i32 2}
+!7 = !{i32 8, !"SmallDataLimit", i32 0}
+!8 = !{!"clang version 20.0.0git (https://github.com/llvm/llvm-project.git a5af6214dd0e9d53c66dc06bcd23540b05c70120)"}
+; CHECK-LABEL: _Z3fooiiii:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    tail OUTLINED_FUNCTION_0
+;
+; CHECK-LABEL: _Z3foziiii:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    tail OUTLINED_FUNCTION_0
+;
+; CHECK-LABEL: OUTLINED_FUNCTION_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mul a0, a1, a0
+; CHECK-NEXT:    mul a1, a3, a2
+; CHECK-NEXT:    add a0, a0, a1
+; CHECK-NEXT:    tail _Z3bari
diff --git a/llvm/test/CodeGen/RISCV/machineoutliner.mir b/llvm/test/CodeGen/RISCV/machineoutliner.mir
index 0221257354fcfa..678dbc48dc5812 100644
--- a/llvm/test/CodeGen/RISCV/machineoutliner.mir
+++ b/llvm/test/CodeGen/RISCV/machineoutliner.mir
@@ -17,9 +17,6 @@
   ; Should not outline functions with named linker sections
   define i32 @dont_outline_1(i32 %a, i32 %b) section "named" { ret i32 0 }
 
-  ; Cannot outline if the X5 (t0) register is not free
-  define i32 @dont_outline_2(i32 %a, i32 %b) { ret i32 0 }
-
 ...
 ---
 name:            outline_0
@@ -29,10 +26,10 @@ body:             |
   bb.0:
     liveins: $x10, $x11
     ; RV32I-MO-LABEL: name: outline_0
-    ; RV32I-MO: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
+    ; RV32I-MO: PseudoTAIL {{.*}} @OUTLINED_FUNCTION_0
     ;
     ; RV64I-MO-LABEL: name: outline_0
-    ; RV64I-MO: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
+    ; RV64I-MO: PseudoTAIL {{.*}} @OUTLINED_FUNCTION_0
     $x11 = ORI $x11, 1023
     $x12 = ADDI $x10, 17
     $x11 = AND $x12, $x11
@@ -48,10 +45,10 @@ body:             |
   bb.0:
     liveins: $x10, $x11
     ; RV32I-MO-LABEL: name: outline_1
-    ; RV32I-MO: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
+    ; RV32I-MO: PseudoTAIL {{.*}} @OUTLINED_FUNCTION_0
     ;
     ; RV64I-MO-LABEL: name: outline_1
-    ; RV64I-MO: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
+    ; RV64I-MO: PseudoTAIL {{.*}} @OUTLINED_FUNCTION_0
     $x11 = ORI $x11, 1023
     $x12 = ADDI $x10, 17
     $x11 = AND $x12, $x11
@@ -67,10 +64,10 @@ body:             |
   bb.0:
     liveins: $x10, $x11
     ; RV32I-MO-LABEL: name: outline_2
-    ; RV32I-MO: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
+    ; RV32I-MO: PseudoTAIL {{.*}} @OUTLINED_FUNCTION_0
     ;
     ; RV64I-MO-LABEL: name: outline_2
-    ; RV64I-MO: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
+    ; RV64I-MO: PseudoTAIL {{.*}} @OUTLINED_FUNCTION_0
     $x11 = ORI $x11, 1023
     $x12 = ADDI $x10, 17
     $x11 = AND $x12, $x11
@@ -86,10 +83,10 @@ body:             |
   bb.0:
     liveins: $x10, $x11
     ; RV32I-MO-LABEL: name: dont_outline_0
-    ; RV32I-MO-NOT: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
+    ; RV32I-MO-NOT: @OUTLINED_FUNCTION_0
     ;
     ; RV64I-MO-LABEL: name: dont_outline_0
-    ; RV64I-MO-NOT: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
+    ; RV64I-MO-NOT: @OUTLINED_FUNCTION_0
     $x11 = ORI $x11, 1023
     $x12 = ADDI $x10, 17
     $x11 = AND $x12, $x11
@@ -115,26 +112,6 @@ body:             |
     $x10 = SUB $x10, $x11
     PseudoRET implicit $x10
 
-...
----
-name:            dont_outline_2
-tracksRegLiveness: true
-isOutlined: false
-body:             |
-  bb.0:
-    liveins: $x10, $x11, $x5
-    ; RV32I-MO-LABEL: name: dont_outline_2
-    ; RV32I-MO-NOT: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
-    ;
-    ; RV64I-MO-LABEL: name: dont_outline_2
-    ; RV64I-MO-NOT: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
-    $x11 = ORI $x11, 1023
-    $x12 = ADDI $x10, 17
-    $x11 = AND $x12, $x11
-    $x10 = SUB $x10, $x11
-    $x10 = ADD $x10, $x5
-    PseudoRET implicit $x10
-
 ...
 
 # CHECK-LABEL: name: OUTLINED_FUNCTION_0



More information about the llvm-commits mailing list