[llvm] [RISCV] Implement tail call optimization in machine outliner (PR #115297)

Mark Goncharov via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 13 23:46:43 PST 2024


https://github.com/mga-sc updated https://github.com/llvm/llvm-project/pull/115297

>From dd8a3d3ba0d7e4b2225a0d824904713ac43dd565 Mon Sep 17 00:00:00 2001
From: Goncharov Mark <mark.goncharov at syntacore.com>
Date: Thu, 7 Nov 2024 07:37:35 +0000
Subject: [PATCH 1/3] [RISCV] Implement tail call optimization in machine
 outliner

Following up issue #89822, this patch adds opportunity
to use tail call in machine outliner pass.
Also it enables outline patterns with X5(T0) register.
---
 llvm/lib/Target/RISCV/RISCVInstrInfo.cpp      | 72 +++++++++++++++----
 .../CodeGen/RISCV/machine-outliner-call.ll    | 70 ++++++++++++++++++
 .../CodeGen/RISCV/machine-outliner-cfi.mir    | 22 +++---
 .../machine-outliner-leaf-descendants.ll      | 13 ++--
 .../RISCV/machine-outliner-patchable.ll       | 24 +++++--
 .../RISCV/machine-outliner-position.mir       | 21 +++---
 .../test/CodeGen/RISCV/machineoutliner-x5.mir | 58 +++++++++++++++
 llvm/test/CodeGen/RISCV/machineoutliner.mir   | 18 +++--
 8 files changed, 242 insertions(+), 56 deletions(-)
 create mode 100644 llvm/test/CodeGen/RISCV/machine-outliner-call.ll
 create mode 100644 llvm/test/CodeGen/RISCV/machineoutliner-x5.mir

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 04bb964bfc48cf..f6425d47fd81bb 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -2929,6 +2929,7 @@ bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
 
 // Enum values indicating how an outlined call should be constructed.
 enum MachineOutlinerConstructionID {
+  MachineOutlinerTailCall,
   MachineOutlinerDefault
 };
 
@@ -2937,19 +2938,47 @@ bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault(
   return MF.getFunction().hasMinSize();
 }
 
+static bool IsCandidatePatchable(const MachineInstr &MI) {
+  const MachineBasicBlock *MBB = MI.getParent();
+  const MachineFunction *MF = MBB->getParent();
+  const Function &F = MF->getFunction();
+  return F.getFnAttribute("fentry-call").getValueAsBool() ||
+         F.hasFnAttribute("patchable-function-entry");
+}
+
+static bool CannotInsertTailCall(const MachineInstr &MI) {
+  if (MI.isTerminator())
+    return IsCandidatePatchable(MI);
+  return true;
+}
+
+static bool MIUseX5(const MachineInstr &MI, const TargetRegisterInfo *TRI) {
+  return MI.modifiesRegister(RISCV::X5, TRI) ||
+         MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5);
+}
+
 std::optional<std::unique_ptr<outliner::OutlinedFunction>>
 RISCVInstrInfo::getOutliningCandidateInfo(
     const MachineModuleInfo &MMI,
     std::vector<outliner::Candidate> &RepeatedSequenceLocs,
     unsigned MinRepeats) const {
 
-  // First we need to filter out candidates where the X5 register (IE t0) can't
-  // be used to setup the function call.
-  auto CannotInsertCall = [](outliner::Candidate &C) {
+  auto CandidateUseX5 = [](outliner::Candidate &C) {
     const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo();
+    for (const MachineInstr &MI : C)
+      if (MIUseX5(MI, TRI))
+        return true;
     return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI);
   };
 
+  auto CannotInsertCall = [CandidateUseX5](outliner::Candidate &C) {
+    if (!CandidateUseX5(C))
+      return false;
+    if (!CannotInsertTailCall(C.back()))
+      return false;
+    return true;
+  };
+
   llvm::erase_if(RepeatedSequenceLocs, CannotInsertCall);
 
   // If the sequence doesn't have enough candidates left, then we're done.
@@ -2961,6 +2990,17 @@ RISCVInstrInfo::getOutliningCandidateInfo(
   for (auto &MI : RepeatedSequenceLocs[0])
     SequenceSize += getInstSizeInBytes(MI);
 
+  if (!CannotInsertTailCall(RepeatedSequenceLocs[0].back())) {
+    // tail function = 8 bytes. Can't be compressed
+    for (auto &C : RepeatedSequenceLocs)
+      C.setCallInfo(MachineOutlinerTailCall, 8);
+
+    // Using tail call we move ret instrunction from caller to calee.
+    //   So, FrameOverhead for this is 0
+    return std::make_unique<outliner::OutlinedFunction>(
+        RepeatedSequenceLocs, SequenceSize, 0, MachineOutlinerTailCall);
+  }
+
   // call t0, function = 8 bytes.
   unsigned CallOverhead = 8;
   for (auto &C : RepeatedSequenceLocs)
@@ -2997,15 +3037,7 @@ RISCVInstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
     return F.needsUnwindTableEntry() ? outliner::InstrType::Illegal
                                      : outliner::InstrType::Invisible;
 
-  // We need support for tail calls to outlined functions before return
-  // statements can be allowed.
-  if (MI.isReturn())
-    return outliner::InstrType::Illegal;
-
-  // Don't allow modifying the X5 register which we use for return addresses for
-  // these outlined functions.
-  if (MI.modifiesRegister(RISCV::X5, TRI) ||
-      MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5))
+  if (CannotInsertTailCall(MBB->back()) && MIUseX5(MI, TRI))
     return outliner::InstrType::Illegal;
 
   // Make sure the operands don't reference something unsafe.
@@ -3041,19 +3073,29 @@ void RISCVInstrInfo::buildOutlinedFrame(
     }
   }
 
+  if (OF.FrameConstructionID == MachineOutlinerTailCall)
+    return;
+
   MBB.addLiveIn(RISCV::X5);
 
   // Add in a return instruction to the end of the outlined frame.
   MBB.insert(MBB.end(), BuildMI(MF, DebugLoc(), get(RISCV::JALR))
-      .addReg(RISCV::X0, RegState::Define)
-      .addReg(RISCV::X5)
-      .addImm(0));
+                            .addReg(RISCV::X0, RegState::Define)
+                            .addReg(RISCV::X5)
+                            .addImm(0));
 }
 
 MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall(
     Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
     MachineFunction &MF, outliner::Candidate &C) const {
 
+  if (C.CallConstructionID == MachineOutlinerTailCall) {
+    It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(RISCV::PseudoTAIL))
+                            .addGlobalAddress(M.getNamedValue(MF.getName()),
+                                              /*Offset=*/0, RISCVII::MO_CALL));
+    return It;
+  }
+
   // Add in a call instruction to the outlined function at the given location.
   It = MBB.insert(It,
                   BuildMI(MF, DebugLoc(), get(RISCV::PseudoCALLReg), RISCV::X5)
diff --git a/llvm/test/CodeGen/RISCV/machine-outliner-call.ll b/llvm/test/CodeGen/RISCV/machine-outliner-call.ll
new file mode 100644
index 00000000000000..b019cfe74864b0
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/machine-outliner-call.ll
@@ -0,0 +1,70 @@
+; RUN: llc < %s -verify-machineinstrs -enable-machine-outliner | FileCheck %s
+
+target triple = "riscv64-unknown-linux-gnu"
+
+declare void @foo(i32, i32, i32, i32) minsize
+
+define void @fentry0(i1 %a) nounwind {
+; CHECK-LABEL: fentry0:
+; CHECK:       # %bb.1:
+; CHECK-NEXT:    call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]]
+; CHECK-NEXT:    call foo
+; CHECK-LABEL: .LBB0_2:
+; CHECK-NEXT:    tail OUTLINED_FUNCTION_[[BB2:[0-9]+]]
+entry:
+  br i1 %a, label %if.then, label %if.end
+if.then:
+  call void @foo(i32 1, i32 2, i32 3, i32 4)
+  br label %if.end
+if.end:
+  call void @foo(i32 5, i32 6, i32 7, i32 8)
+  ret void
+}
+
+define void @fentry1(i1 %a) nounwind {
+; CHECK-LABEL: fentry1:
+; CHECK:       # %bb.1:
+; CHECK-NEXT:    call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]]
+; CHECK-NEXT:    call foo
+; CHECK-LABEL: .LBB1_2:
+; CHECK-NEXT:    tail OUTLINED_FUNCTION_[[BB2:[0-9]+]]
+entry:
+  br i1 %a, label %if.then, label %if.end
+if.then:
+  call void @foo(i32 1, i32 2, i32 3, i32 4)
+  br label %if.end
+if.end:
+  call void @foo(i32 5, i32 6, i32 7, i32 8)
+  ret void
+}
+
+define void @fentry2(i1 %a) nounwind {
+; CHECK-LABEL: fentry2:
+; CHECK:       # %bb.1:
+; CHECK-NEXT:    call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]]
+; CHECK-NEXT:    call foo
+; CHECK-LABEL: .LBB2_2:
+; CHECK-NEXT:    tail OUTLINED_FUNCTION_[[BB2:[0-9]+]]
+entry:
+  br i1 %a, label %if.then, label %if.end
+if.then:
+  call void @foo(i32 1, i32 2, i32 3, i32 4)
+  br label %if.end
+if.end:
+  call void @foo(i32 5, i32 6, i32 7, i32 8)
+  ret void
+}
+
+; CHECK:       OUTLINED_FUNCTION_[[BB2]]:
+; CHECK:       li      a0, 5
+; CHECK-NEXT:  li      a1, 6
+; CHECK-NEXT:  li      a2, 7
+; CHECK-NEXT:  li      a3, 8
+; CHECK-NEXT:  call foo
+
+; CHECK:       OUTLINED_FUNCTION_[[BB1]]:
+; CHECK:       li      a0, 1
+; CHECK-NEXT:  li      a1, 2
+; CHECK-NEXT:  li      a2, 3
+; CHECK-NEXT:  li      a3, 4
+; CHECK-NEXT:  jr      t0
diff --git a/llvm/test/CodeGen/RISCV/machine-outliner-cfi.mir b/llvm/test/CodeGen/RISCV/machine-outliner-cfi.mir
index 6ecca6a1b18ef8..2acb1d43e01eaf 100644
--- a/llvm/test/CodeGen/RISCV/machine-outliner-cfi.mir
+++ b/llvm/test/CodeGen/RISCV/machine-outliner-cfi.mir
@@ -22,13 +22,11 @@ body:             |
     ; RV32I-MO-LABEL: name: func1
     ; RV32I-MO: liveins: $x10, $x11
     ; RV32I-MO-NEXT: {{  $}}
-    ; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
-    ; RV32I-MO-NEXT: PseudoRET
+    ; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     ; RV64I-MO-LABEL: name: func1
     ; RV64I-MO: liveins: $x10, $x11
     ; RV64I-MO-NEXT: {{  $}}
-    ; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
-    ; RV64I-MO-NEXT: PseudoRET
+    ; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     $x10 = ORI $x10, 1023
     CFI_INSTRUCTION offset $x1, 0
     $x11 = ORI $x11, 1023
@@ -49,13 +47,11 @@ body:             |
     ; RV32I-MO-LABEL: name: func2
     ; RV32I-MO: liveins: $x10, $x11
     ; RV32I-MO-NEXT: {{  $}}
-    ; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
-    ; RV32I-MO-NEXT: PseudoRET
+    ; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     ; RV64I-MO-LABEL: name: func2
     ; RV64I-MO: liveins: $x10, $x11
     ; RV64I-MO-NEXT: {{  $}}
-    ; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
-    ; RV64I-MO-NEXT: PseudoRET
+    ; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     $x10 = ORI $x10, 1023
     CFI_INSTRUCTION offset $x1, 0
     $x11 = ORI $x11, 1023
@@ -76,13 +72,11 @@ body:             |
     ; RV32I-MO-LABEL: name: func3
     ; RV32I-MO: liveins: $x10, $x11
     ; RV32I-MO-NEXT: {{  $}}
-    ; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
-    ; RV32I-MO-NEXT: PseudoRET
+    ; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     ; RV64I-MO-LABEL: name: func3
     ; RV64I-MO: liveins: $x10, $x11
     ; RV64I-MO-NEXT: {{  $}}
-    ; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
-    ; RV64I-MO-NEXT: PseudoRET
+    ; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     $x10 = ORI $x10, 1023
     CFI_INSTRUCTION offset $x1, -12
     $x11 = ORI $x11, 1023
@@ -96,11 +90,11 @@ body:             |
 
 
 # OUTLINED-LABEL: name: OUTLINED_FUNCTION_0
-# OUTLINED: liveins: $x11, $x10, $x5
+# OUTLINED: liveins: $x11, $x10
 # OUTLINED-NEXT: {{  $}}
 # OUTLINED-NEXT: $x10 = ORI $x10, 1023
 # OUTLINED-NEXT: $x11 = ORI $x11, 1023
 # OUTLINED-NEXT: $x12 = ADDI $x10, 17
 # OUTLINED-NEXT: $x11 = AND $x12, $x11
 # OUTLINED-NEXT: $x10 = SUB $x10, $x11
-# OUTLINED-NEXT: $x0 = JALR $x5, 0
+# OUTLINED-NEXT: PseudoRET
diff --git a/llvm/test/CodeGen/RISCV/machine-outliner-leaf-descendants.ll b/llvm/test/CodeGen/RISCV/machine-outliner-leaf-descendants.ll
index 8fab0aa9b6a76c..0441361b117989 100644
--- a/llvm/test/CodeGen/RISCV/machine-outliner-leaf-descendants.ll
+++ b/llvm/test/CodeGen/RISCV/machine-outliner-leaf-descendants.ll
@@ -94,7 +94,8 @@ define i32 @_Z2f6v() minsize {
 ; CHECK-BASELINE-NEXT:	li	a3, 0x4
 ; CHECK-BASELINE-NEXT:	li	a4, 0x5
 ; CHECK-BASELINE-NEXT:	li	a5, 0x6
-; CHECK-BASELINE-NEXT:	jr	t0
+; CHECK-BASELINE-NEXT:	auipc	t1, 0x0
+; CHECK-BASELINE-NEXT:	jr	t1
 
 ; CHECK-BASELINE: <OUTLINED_FUNCTION_1>:
 ; CHECK-BASELINE-NEXT:	li	a0, 0x1
@@ -102,8 +103,9 @@ define i32 @_Z2f6v() minsize {
 ; CHECK-BASELINE-NEXT:	li	a2, 0x3
 ; CHECK-BASELINE-NEXT:	li	a3, 0x4
 ; CHECK-BASELINE-NEXT:	li	a4, 0x5
-; CHECK-BASELINE-NEXT:	li	a5, 0x7
-; CHECK-BASELINE-NEXT:	jr	t0
+; CHECK-BASELINE-NEXT:	li	a5, 0x8
+; CHECK-BASELINE-NEXT:	auipc	t1, 0x0
+; CHECK-BASELINE-NEXT:	jr	t1
 
 ; CHECK-BASELINE: <OUTLINED_FUNCTION_2>:
 ; CHECK-BASELINE-NEXT:	li	a0, 0x1
@@ -111,8 +113,9 @@ define i32 @_Z2f6v() minsize {
 ; CHECK-BASELINE-NEXT:	li	a2, 0x3
 ; CHECK-BASELINE-NEXT:	li	a3, 0x4
 ; CHECK-BASELINE-NEXT:	li	a4, 0x5
-; CHECK-BASELINE-NEXT:	li	a5, 0x8
-; CHECK-BASELINE-NEXT:	jr	t0
+; CHECK-BASELINE-NEXT:	li	a5, 0x7
+; CHECK-BASELINE-NEXT:	auipc	t1, 0x0
+; CHECK-BASELINE-NEXT:	jr	t1
 
 ; CHECK-LEAF-DESCENDANTS: <OUTLINED_FUNCTION_0>:
 ; CHECK-LEAF-DESCENDANTS-NEXT:	li	a0, 0x1
diff --git a/llvm/test/CodeGen/RISCV/machine-outliner-patchable.ll b/llvm/test/CodeGen/RISCV/machine-outliner-patchable.ll
index 4ef3abd241577f..4a54a7289ddf27 100644
--- a/llvm/test/CodeGen/RISCV/machine-outliner-patchable.ll
+++ b/llvm/test/CodeGen/RISCV/machine-outliner-patchable.ll
@@ -11,7 +11,11 @@ define void @fentry0(i1 %a) nounwind "fentry-call"="true" {
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    # FEntry call
 ; CHECK:       # %bb.1:
-; CHECK-NEXT:    call t0, OUTLINED_FUNCTION_1
+; CHECK-NEXT:    call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]]
+; CHECK-NEXT:    call foo
+; CHECK-LABEL: .LBB0_2:
+; CHECK-NEXT:    call	t0, OUTLINED_FUNCTION_[[BB2:[0-9]+]]
+; CHECK-NEXT:    call	foo
 entry:
   br i1 %a, label %if.then, label %if.end
 if.then:
@@ -27,7 +31,11 @@ define void @fentry1(i1 %a) nounwind "fentry-call"="true" {
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    # FEntry call
 ; CHECK:       # %bb.1:
-; CHECK-NEXT:    call t0, OUTLINED_FUNCTION_1
+; CHECK-NEXT:    call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]]
+; CHECK-NEXT:    call foo
+; CHECK-LABEL: .LBB1_2:
+; CHECK-NEXT:    call	t0, OUTLINED_FUNCTION_[[BB2:[0-9]+]]
+; CHECK-NEXT:    call	foo
 entry:
   br i1 %a, label %if.then, label %if.end
 if.then:
@@ -47,7 +55,11 @@ define void @patchable0(i1 %a) nounwind "patchable-function-entry"="2" {
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    nop
 ; CHECK:       # %bb.1:
-; CHECK-NEXT:    call t0, OUTLINED_FUNCTION_1
+; CHECK-NEXT:    call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]]
+; CHECK-NEXT:    call foo
+; CHECK-LABEL: .LBB2_2:
+; CHECK-NEXT:    call	t0, OUTLINED_FUNCTION_[[BB2:[0-9]+]]
+; CHECK-NEXT:    call	foo
 entry:
   br i1 %a, label %if.then, label %if.end
 if.then:
@@ -65,7 +77,11 @@ define void @patchable1(i1 %a) nounwind "patchable-function-entry"="2" {
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    nop
 ; CHECK:       # %bb.1:
-; CHECK-NEXT:    call t0, OUTLINED_FUNCTION_1
+; CHECK-NEXT:    call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]]
+; CHECK-NEXT:    call foo
+; CHECK-LABEL: .LBB3_2:
+; CHECK-NEXT:    call	t0, OUTLINED_FUNCTION_[[BB2:[0-9]+]]
+; CHECK-NEXT:    call	foo
 entry:
   br i1 %a, label %if.then, label %if.end
 if.then:
diff --git a/llvm/test/CodeGen/RISCV/machine-outliner-position.mir b/llvm/test/CodeGen/RISCV/machine-outliner-position.mir
index 715e212eecabb3..47ec447f61d09c 100644
--- a/llvm/test/CodeGen/RISCV/machine-outliner-position.mir
+++ b/llvm/test/CodeGen/RISCV/machine-outliner-position.mir
@@ -25,15 +25,14 @@ body:             |
     ; RV32I-MO-NEXT: {{  $}}
     ; RV32I-MO-NEXT: $x10 = ORI $x10, 1023
     ; RV32I-MO-NEXT: EH_LABEL <mcsymbol .Ltmp0>
-    ; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
-    ; RV32I-MO-NEXT: PseudoRET
+    ; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
+    ;
     ; RV64I-MO-LABEL: name: func1
     ; RV64I-MO: liveins: $x10, $x11
     ; RV64I-MO-NEXT: {{  $}}
     ; RV64I-MO-NEXT: $x10 = ORI $x10, 1023
     ; RV64I-MO-NEXT: EH_LABEL <mcsymbol .Ltmp0>
-    ; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
-    ; RV64I-MO-NEXT: PseudoRET
+    ; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     $x10 = ORI $x10, 1023
     EH_LABEL <mcsymbol .Ltmp0>
     $x11 = ORI $x11, 1023
@@ -53,15 +52,14 @@ body:             |
     ; RV32I-MO-NEXT: {{  $}}
     ; RV32I-MO-NEXT: $x10 = ORI $x10, 1023
     ; RV32I-MO-NEXT: GC_LABEL <mcsymbol .Ltmp1>
-    ; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
-    ; RV32I-MO-NEXT: PseudoRET
+    ; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
+    ;
     ; RV64I-MO-LABEL: name: func2
     ; RV64I-MO: liveins: $x10, $x11
     ; RV64I-MO-NEXT: {{  $}}
     ; RV64I-MO-NEXT: $x10 = ORI $x10, 1023
     ; RV64I-MO-NEXT: GC_LABEL <mcsymbol .Ltmp1>
-    ; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
-    ; RV64I-MO-NEXT: PseudoRET
+    ; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     $x10 = ORI $x10, 1023
     GC_LABEL <mcsymbol .Ltmp1>
     $x11 = ORI $x11, 1023
@@ -81,15 +79,14 @@ body:             |
     ; RV32I-MO-NEXT: {{  $}}
     ; RV32I-MO-NEXT: $x10 = ORI $x10, 1023
     ; RV32I-MO-NEXT: ANNOTATION_LABEL <mcsymbol .Ltmp2>
-    ; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
-    ; RV32I-MO-NEXT: PseudoRET
+    ; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
+    ;
     ; RV64I-MO-LABEL: name: func3
     ; RV64I-MO: liveins: $x10, $x11
     ; RV64I-MO-NEXT: {{  $}}
     ; RV64I-MO-NEXT: $x10 = ORI $x10, 1023
     ; RV64I-MO-NEXT: ANNOTATION_LABEL <mcsymbol .Ltmp2>
-    ; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
-    ; RV64I-MO-NEXT: PseudoRET
+    ; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     $x10 = ORI $x10, 1023
     ANNOTATION_LABEL <mcsymbol .Ltmp2>
     $x11 = ORI $x11, 1023
diff --git a/llvm/test/CodeGen/RISCV/machineoutliner-x5.mir b/llvm/test/CodeGen/RISCV/machineoutliner-x5.mir
new file mode 100644
index 00000000000000..b01cda582e19b0
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/machineoutliner-x5.mir
@@ -0,0 +1,58 @@
+# Check that modifying X5 register is not a problem for machine outliner
+
+# RUN: llc -mtriple=riscv32 -x mir -run-pass=machine-outliner -simplify-mir -verify-machineinstrs < %s \
+# RUN: | FileCheck -check-prefixes=CHECK,RV32I-MO %s
+# RUN: llc -mtriple=riscv64 -x mir -run-pass=machine-outliner -simplify-mir -verify-machineinstrs < %s \
+# RUN: | FileCheck -check-prefixes=CHECK,RV64I-MO %s
+
+--- |
+  define i32 @outline_0(i32 %a, i32 %b) { ret i32 0 }
+
+  define i32 @outline_1(i32 %a, i32 %b) { ret i32 0 }
+
+  define i32 @outline_2(i32 %a, i32 %b) { ret i32 0 }
+
+...
+---
+name:            outline_0
+tracksRegLiveness: true
+isOutlined: false
+body:             |
+  bb.0:
+    liveins: $x10, $x11, $x5
+    ; RV32I-MO-LABEL: name: outline_0
+    ; RV32I-MO:         PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x5, implicit $x10, implicit $x11
+    ;
+    ; RV64I-MO-LABEL: name: outline_0
+    ; RV64I-MO:         PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x5, implicit $x10, implicit $x11
+    $x11 = ORI $x11, 1023
+    $x12 = ADDI $x10, 17
+    $x10 = ADD $x10, $x5
+    $x11 = AND $x12, $x11
+    $x10 = SUB $x10, $x11
+    PseudoRET implicit $x10
+
+...
+---
+name:            outline_1
+tracksRegLiveness: true
+isOutlined: false
+body:             |
+  bb.0:
+    liveins: $x10, $x11, $x5
+    ; RV32I-MO-LABEL: name: outline_1
+    ; RV32I-MO:         PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x5, implicit $x10, implicit $x11
+    ;
+    ; RV64I-MO-LABEL: name: outline_1
+    ; RV64I-MO:         PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x5, implicit $x10, implicit $x11
+    $x11 = ORI $x11, 1023
+    $x12 = ADDI $x10, 17
+    $x10 = ADD $x10, $x5
+    $x11 = AND $x12, $x11
+    $x10 = SUB $x10, $x11
+    PseudoRET implicit $x10
+
+...
+
+# CHECK-LABEL: name: OUTLINED_FUNCTION_0
+# CHECK: isOutlined: true
diff --git a/llvm/test/CodeGen/RISCV/machineoutliner.mir b/llvm/test/CodeGen/RISCV/machineoutliner.mir
index 0221257354fcfa..ab12bfbe1fafc4 100644
--- a/llvm/test/CodeGen/RISCV/machineoutliner.mir
+++ b/llvm/test/CodeGen/RISCV/machineoutliner.mir
@@ -29,10 +29,10 @@ body:             |
   bb.0:
     liveins: $x10, $x11
     ; RV32I-MO-LABEL: name: outline_0
-    ; RV32I-MO: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
+    ; RV32I-MO:         PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     ;
     ; RV64I-MO-LABEL: name: outline_0
-    ; RV64I-MO: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
+    ; RV64I-MO:         PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     $x11 = ORI $x11, 1023
     $x12 = ADDI $x10, 17
     $x11 = AND $x12, $x11
@@ -48,10 +48,10 @@ body:             |
   bb.0:
     liveins: $x10, $x11
     ; RV32I-MO-LABEL: name: outline_1
-    ; RV32I-MO: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
+    ; RV32I-MO:         PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     ;
     ; RV64I-MO-LABEL: name: outline_1
-    ; RV64I-MO: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
+    ; RV64I-MO:         PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     $x11 = ORI $x11, 1023
     $x12 = ADDI $x10, 17
     $x11 = AND $x12, $x11
@@ -67,10 +67,10 @@ body:             |
   bb.0:
     liveins: $x10, $x11
     ; RV32I-MO-LABEL: name: outline_2
-    ; RV32I-MO: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
+    ; RV32I-MO:         PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     ;
     ; RV64I-MO-LABEL: name: outline_2
-    ; RV64I-MO: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
+    ; RV64I-MO:         PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     $x11 = ORI $x11, 1023
     $x12 = ADDI $x10, 17
     $x11 = AND $x12, $x11
@@ -87,9 +87,11 @@ body:             |
     liveins: $x10, $x11
     ; RV32I-MO-LABEL: name: dont_outline_0
     ; RV32I-MO-NOT: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
+    ; RV32I-MO-NOT: PseudoTAIL @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     ;
     ; RV64I-MO-LABEL: name: dont_outline_0
     ; RV64I-MO-NOT: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
+    ; RV64I-MO-NOT: PseudoTAIL @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     $x11 = ORI $x11, 1023
     $x12 = ADDI $x10, 17
     $x11 = AND $x12, $x11
@@ -106,9 +108,11 @@ body:             |
     liveins: $x10, $x11
     ; RV32I-MO-LABEL: name: dont_outline_1
     ; RV32I-MO-NOT: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
+    ; RV32I-MO-NOT: PseudoTAIL @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     ;
     ; RV64I-MO-LABEL: name: dont_outline_1
     ; RV64I-MO-NOT: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
+    ; RV64I-MO-NOT: PseudoTAIL @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     $x11 = ORI $x11, 1023
     $x12 = ADDI $x10, 17
     $x11 = AND $x12, $x11
@@ -125,9 +129,11 @@ body:             |
     liveins: $x10, $x11, $x5
     ; RV32I-MO-LABEL: name: dont_outline_2
     ; RV32I-MO-NOT: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
+    ; RV32I-MO-NOT: PseudoTAIL @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     ;
     ; RV64I-MO-LABEL: name: dont_outline_2
     ; RV64I-MO-NOT: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0
+    ; RV64I-MO-NOT: PseudoTAIL @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
     $x11 = ORI $x11, 1023
     $x12 = ADDI $x10, 17
     $x11 = AND $x12, $x11

>From 74ca6ea375973bbbea17c0170d4afa69e0a73896 Mon Sep 17 00:00:00 2001
From: Goncharov Mark <mark.goncharov at syntacore.com>
Date: Fri, 8 Nov 2024 06:34:19 +0000
Subject: [PATCH 2/3] Fix misspelling

---
 llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 30 ++++++++++++------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index f6425d47fd81bb..88bab1e4d868dc 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -2938,7 +2938,7 @@ bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault(
   return MF.getFunction().hasMinSize();
 }
 
-static bool IsCandidatePatchable(const MachineInstr &MI) {
+static bool isCandidatePatchable(const MachineInstr &MI) {
   const MachineBasicBlock *MBB = MI.getParent();
   const MachineFunction *MF = MBB->getParent();
   const Function &F = MF->getFunction();
@@ -2946,13 +2946,13 @@ static bool IsCandidatePatchable(const MachineInstr &MI) {
          F.hasFnAttribute("patchable-function-entry");
 }
 
-static bool CannotInsertTailCall(const MachineInstr &MI) {
+static bool cannotInsertTailCall(const MachineInstr &MI) {
   if (MI.isTerminator())
-    return IsCandidatePatchable(MI);
+    return isCandidatePatchable(MI);
   return true;
 }
 
-static bool MIUseX5(const MachineInstr &MI, const TargetRegisterInfo *TRI) {
+static bool isMIUsesX5(const MachineInstr &MI, const TargetRegisterInfo *TRI) {
   return MI.modifiesRegister(RISCV::X5, TRI) ||
          MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5);
 }
@@ -2963,18 +2963,18 @@ RISCVInstrInfo::getOutliningCandidateInfo(
     std::vector<outliner::Candidate> &RepeatedSequenceLocs,
     unsigned MinRepeats) const {
 
-  auto CandidateUseX5 = [](outliner::Candidate &C) {
+  auto CandidateUsesX5 = [](outliner::Candidate &C) {
     const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo();
     for (const MachineInstr &MI : C)
-      if (MIUseX5(MI, TRI))
+      if (isMIUsesX5(MI, TRI))
         return true;
     return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI);
   };
 
-  auto CannotInsertCall = [CandidateUseX5](outliner::Candidate &C) {
-    if (!CandidateUseX5(C))
+  auto CannotInsertCall = [CandidateUsesX5](outliner::Candidate &C) {
+    if (!CandidateUsesX5(C))
       return false;
-    if (!CannotInsertTailCall(C.back()))
+    if (!cannotInsertTailCall(C.back()))
       return false;
     return true;
   };
@@ -2990,12 +2990,12 @@ RISCVInstrInfo::getOutliningCandidateInfo(
   for (auto &MI : RepeatedSequenceLocs[0])
     SequenceSize += getInstSizeInBytes(MI);
 
-  if (!CannotInsertTailCall(RepeatedSequenceLocs[0].back())) {
+  if (!cannotInsertTailCall(RepeatedSequenceLocs[0].back())) {
     // tail function = 8 bytes. Can't be compressed
     for (auto &C : RepeatedSequenceLocs)
       C.setCallInfo(MachineOutlinerTailCall, 8);
 
-    // Using tail call we move ret instrunction from caller to calee.
+    // Using tail call we move ret instruction from caller to calle.
     //   So, FrameOverhead for this is 0
     return std::make_unique<outliner::OutlinedFunction>(
         RepeatedSequenceLocs, SequenceSize, 0, MachineOutlinerTailCall);
@@ -3037,7 +3037,7 @@ RISCVInstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
     return F.needsUnwindTableEntry() ? outliner::InstrType::Illegal
                                      : outliner::InstrType::Invisible;
 
-  if (CannotInsertTailCall(MBB->back()) && MIUseX5(MI, TRI))
+  if (cannotInsertTailCall(MBB->back()) && isMIUsesX5(MI, TRI))
     return outliner::InstrType::Illegal;
 
   // Make sure the operands don't reference something unsafe.
@@ -3080,9 +3080,9 @@ void RISCVInstrInfo::buildOutlinedFrame(
 
   // Add in a return instruction to the end of the outlined frame.
   MBB.insert(MBB.end(), BuildMI(MF, DebugLoc(), get(RISCV::JALR))
-                            .addReg(RISCV::X0, RegState::Define)
-                            .addReg(RISCV::X5)
-                            .addImm(0));
+      .addReg(RISCV::X0, RegState::Define)
+      .addReg(RISCV::X5)
+      .addImm(0));
 }
 
 MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall(

>From 7c57f39fa31d30dcdf2bf84bfa7e8d30852a7c64 Mon Sep 17 00:00:00 2001
From: Goncharov Mark <mark.goncharov at syntacore.com>
Date: Thu, 14 Nov 2024 07:44:35 +0000
Subject: [PATCH 3/3] Fix linker relaxation problem

---
 .../Target/RISCV/MCTargetDesc/RISCVBaseInfo.h |   6 +
 .../RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp |   6 +-
 llvm/lib/Target/RISCV/RISCVInstrInfo.cpp      | 141 +++++++++++-------
 .../test/CodeGen/RISCV/machineoutliner-x5.mir |  74 +++++++--
 4 files changed, 159 insertions(+), 68 deletions(-)

diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index 19103e219cb800..ca2f868cd4e764 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -208,6 +208,12 @@ static inline unsigned getVLOpNum(const MCInstrDesc &Desc) {
   return Desc.getNumOperands() - Offset;
 }
 
+static inline unsigned getTailExpandUseRegNo(const FeatureBitset &FeatureBits) {
+  // For Zicfilp, PseudoTAIL should be expanded to a software guarded branch.
+  // It means to use t2(x7) as rs1 of JALR to expand PseudoTAIL.
+  return FeatureBits[RISCV::FeatureStdExtZicfilp] ? RISCV::X7 : RISCV::X6;
+}
+
 static inline unsigned getSEWOpNum(const MCInstrDesc &Desc) {
   const uint64_t TSFlags = Desc.TSFlags;
   assert(hasSEWOp(TSFlags));
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index 54f1a3899c4957..3022b03f55daf5 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -125,11 +125,7 @@ void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI,
   MCRegister Ra;
   if (MI.getOpcode() == RISCV::PseudoTAIL) {
     Func = MI.getOperand(0);
-    Ra = RISCV::X6;
-    // For Zicfilp, PseudoTAIL should be expanded to a software guarded branch.
-    // It means to use t2(x7) as rs1 of JALR to expand PseudoTAIL.
-    if (STI.hasFeature(RISCV::FeatureStdExtZicfilp))
-      Ra = RISCV::X7;
+    Ra = RISCVII::getTailExpandUseRegNo(STI.getFeatureBits());
   } else if (MI.getOpcode() == RISCV::PseudoCALLReg) {
     Func = MI.getOperand(1);
     Ra = MI.getOperand(0).getReg();
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 88bab1e4d868dc..ada56641e34332 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "RISCVInstrInfo.h"
+#include "MCTargetDesc/RISCVBaseInfo.h"
 #include "MCTargetDesc/RISCVMatInt.h"
 #include "RISCV.h"
 #include "RISCVMachineFunctionInfo.h"
@@ -2938,85 +2939,118 @@ bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault(
   return MF.getFunction().hasMinSize();
 }
 
-static bool isCandidatePatchable(const MachineInstr &MI) {
-  const MachineBasicBlock *MBB = MI.getParent();
-  const MachineFunction *MF = MBB->getParent();
+static bool isCandidatePatchable(const MachineBasicBlock &MBB) {
+  const MachineFunction *MF = MBB.getParent();
   const Function &F = MF->getFunction();
   return F.getFnAttribute("fentry-call").getValueAsBool() ||
          F.hasFnAttribute("patchable-function-entry");
 }
 
-static bool cannotInsertTailCall(const MachineInstr &MI) {
-  if (MI.isTerminator())
-    return isCandidatePatchable(MI);
-  return true;
+static bool isMIReadsReg(const MachineInstr &MI, const TargetRegisterInfo *TRI,
+                         unsigned RegNo) {
+  return MI.readsRegister(RegNo, TRI) ||
+         MI.getDesc().hasImplicitUseOfPhysReg(RegNo);
 }
 
-static bool isMIUsesX5(const MachineInstr &MI, const TargetRegisterInfo *TRI) {
-  return MI.modifiesRegister(RISCV::X5, TRI) ||
-         MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5);
+static bool isMIModifiesReg(const MachineInstr &MI,
+                            const TargetRegisterInfo *TRI, unsigned RegNo) {
+  return MI.modifiesRegister(RegNo, TRI) ||
+         MI.getDesc().hasImplicitDefOfPhysReg(RegNo);
 }
 
-std::optional<std::unique_ptr<outliner::OutlinedFunction>>
-RISCVInstrInfo::getOutliningCandidateInfo(
-    const MachineModuleInfo &MMI,
-    std::vector<outliner::Candidate> &RepeatedSequenceLocs,
-    unsigned MinRepeats) const {
+static bool cannotInsertTailCall(const MachineBasicBlock &MBB) {
+  if (!MBB.back().isReturn())
+    return true;
+  if (isCandidatePatchable(MBB))
+    return true;
+
+  // If the candidate reads the pre-set register
+  // that can be used for expanding PseudoTAIL instruction,
+  // then we cannot insert tail call.
+  const TargetSubtargetInfo &STI = MBB.getParent()->getSubtarget();
+  unsigned TailExpandUseRegNo =
+      RISCVII::getTailExpandUseRegNo(STI.getFeatureBits());
+  for (const MachineInstr &MI : MBB) {
+    if (isMIReadsReg(MI, STI.getRegisterInfo(), TailExpandUseRegNo))
+      return true;
+    if (isMIModifiesReg(MI, STI.getRegisterInfo(), TailExpandUseRegNo))
+      break;
+  }
+  return false;
+}
+
+static std::optional<MachineOutlinerConstructionID>
+analyzeCandidate(outliner::Candidate &C) {
+  // If last instruction is return then we can rely on
+  // the verification already performed in the getOutliningTypeImpl.
+  if (C.back().isReturn()) {
+    assert(!cannotInsertTailCall(*C.getMBB()) &&
+           "The candidate who uses return instruction must be outlined "
+           "using tail call");
+    return MachineOutlinerTailCall;
+  }
 
   auto CandidateUsesX5 = [](outliner::Candidate &C) {
     const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo();
-    for (const MachineInstr &MI : C)
-      if (isMIUsesX5(MI, TRI))
-        return true;
+    if (std::any_of(C.begin(), C.end(), [TRI](const MachineInstr &MI) {
+          return isMIModifiesReg(MI, TRI, RISCV::X5);
+        }))
+      return true;
     return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI);
   };
 
-  auto CannotInsertCall = [CandidateUsesX5](outliner::Candidate &C) {
-    if (!CandidateUsesX5(C))
-      return false;
-    if (!cannotInsertTailCall(C.back()))
-      return false;
-    return true;
-  };
+  if (!CandidateUsesX5(C))
+    return MachineOutlinerDefault;
+
+  return std::nullopt;
+}
+
+std::optional<std::unique_ptr<outliner::OutlinedFunction>>
+RISCVInstrInfo::getOutliningCandidateInfo(
+    const MachineModuleInfo &MMI,
+    std::vector<outliner::Candidate> &RepeatedSequenceLocs,
+    unsigned MinRepeats) const {
 
-  llvm::erase_if(RepeatedSequenceLocs, CannotInsertCall);
+  // Each RepeatedSequenceLoc is identical.
+  outliner::Candidate &Candidate = RepeatedSequenceLocs[0];
+  auto CandidateInfo = analyzeCandidate(Candidate);
+  if (!CandidateInfo)
+    RepeatedSequenceLocs.clear();
 
   // If the sequence doesn't have enough candidates left, then we're done.
   if (RepeatedSequenceLocs.size() < MinRepeats)
     return std::nullopt;
 
-  unsigned SequenceSize = 0;
-
-  for (auto &MI : RepeatedSequenceLocs[0])
-    SequenceSize += getInstSizeInBytes(MI);
-
-  if (!cannotInsertTailCall(RepeatedSequenceLocs[0].back())) {
-    // tail function = 8 bytes. Can't be compressed
-    for (auto &C : RepeatedSequenceLocs)
-      C.setCallInfo(MachineOutlinerTailCall, 8);
-
-    // Using tail call we move ret instruction from caller to calle.
-    //   So, FrameOverhead for this is 0
-    return std::make_unique<outliner::OutlinedFunction>(
-        RepeatedSequenceLocs, SequenceSize, 0, MachineOutlinerTailCall);
+  unsigned InstrSizeCExt =
+      Candidate.getMF()->getSubtarget<RISCVSubtarget>().hasStdExtCOrZca() ? 2
+                                                                          : 4;
+  unsigned CallOverhead = 0, FrameOverhead = 0;
+
+  MachineOutlinerConstructionID MOCI = CandidateInfo.value();
+  switch (MOCI) {
+  case MachineOutlinerDefault:
+    // call t0, function = 8 bytes.
+    CallOverhead = 8;
+    // jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled.
+    FrameOverhead = InstrSizeCExt;
+    break;
+  case MachineOutlinerTailCall:
+    // tail call = auipc + jalr in the worst case without linker relaxation.
+    CallOverhead = 4 + InstrSizeCExt;
+    // Using tail call we move ret instruction from caller to callee.
+    FrameOverhead = 0;
+    break;
   }
 
-  // call t0, function = 8 bytes.
-  unsigned CallOverhead = 8;
   for (auto &C : RepeatedSequenceLocs)
-    C.setCallInfo(MachineOutlinerDefault, CallOverhead);
+    C.setCallInfo(MOCI, CallOverhead);
 
-  // jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled.
-  unsigned FrameOverhead = 4;
-  if (RepeatedSequenceLocs[0]
-          .getMF()
-          ->getSubtarget<RISCVSubtarget>()
-          .hasStdExtCOrZca())
-    FrameOverhead = 2;
+  unsigned SequenceSize = 0;
+  for (auto &MI : Candidate)
+    SequenceSize += getInstSizeInBytes(MI);
 
   return std::make_unique<outliner::OutlinedFunction>(
-      RepeatedSequenceLocs, SequenceSize, FrameOverhead,
-      MachineOutlinerDefault);
+      RepeatedSequenceLocs, SequenceSize, FrameOverhead, MOCI);
 }
 
 outliner::InstrType
@@ -3037,7 +3071,8 @@ RISCVInstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
     return F.needsUnwindTableEntry() ? outliner::InstrType::Illegal
                                      : outliner::InstrType::Invisible;
 
-  if (cannotInsertTailCall(MBB->back()) && isMIUsesX5(MI, TRI))
+  if (cannotInsertTailCall(*MBB) &&
+      (MI.isReturn() || isMIModifiesReg(MI, TRI, RISCV::X5)))
     return outliner::InstrType::Illegal;
 
   // Make sure the operands don't reference something unsafe.
diff --git a/llvm/test/CodeGen/RISCV/machineoutliner-x5.mir b/llvm/test/CodeGen/RISCV/machineoutliner-x5.mir
index b01cda582e19b0..2c9af620021186 100644
--- a/llvm/test/CodeGen/RISCV/machineoutliner-x5.mir
+++ b/llvm/test/CodeGen/RISCV/machineoutliner-x5.mir
@@ -6,24 +6,26 @@
 # RUN: | FileCheck -check-prefixes=CHECK,RV64I-MO %s
 
 --- |
-  define i32 @outline_0(i32 %a, i32 %b) { ret i32 0 }
+  define i32 @outline_tail_1(i32 %a, i32 %b) { ret i32 0 }
 
-  define i32 @outline_1(i32 %a, i32 %b) { ret i32 0 }
+  define i32 @outline_tail_2(i32 %a, i32 %b) { ret i32 0 }
 
-  define i32 @outline_2(i32 %a, i32 %b) { ret i32 0 }
+  define i32 @outline_call_1(i32 %a, i32 %b) { ret i32 0 }
+
+  define i32 @outline_call_2(i32 %a, i32 %b) { ret i32 0 }
 
 ...
 ---
-name:            outline_0
+name:            outline_tail_1
 tracksRegLiveness: true
 isOutlined: false
 body:             |
   bb.0:
     liveins: $x10, $x11, $x5
-    ; RV32I-MO-LABEL: name: outline_0
+    ; RV32I-MO-LABEL: name: outline_tail_1
     ; RV32I-MO:         PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x5, implicit $x10, implicit $x11
     ;
-    ; RV64I-MO-LABEL: name: outline_0
+    ; RV64I-MO-LABEL: name: outline_tail_1
     ; RV64I-MO:         PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x5, implicit $x10, implicit $x11
     $x11 = ORI $x11, 1023
     $x12 = ADDI $x10, 17
@@ -31,19 +33,18 @@ body:             |
     $x11 = AND $x12, $x11
     $x10 = SUB $x10, $x11
     PseudoRET implicit $x10
-
 ...
 ---
-name:            outline_1
+name:            outline_tail_2
 tracksRegLiveness: true
 isOutlined: false
 body:             |
   bb.0:
     liveins: $x10, $x11, $x5
-    ; RV32I-MO-LABEL: name: outline_1
+    ; RV32I-MO-LABEL: name: outline_tail_2
     ; RV32I-MO:         PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x5, implicit $x10, implicit $x11
     ;
-    ; RV64I-MO-LABEL: name: outline_1
+    ; RV64I-MO-LABEL: name: outline_tail_2
     ; RV64I-MO:         PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x5, implicit $x10, implicit $x11
     $x11 = ORI $x11, 1023
     $x12 = ADDI $x10, 17
@@ -51,6 +52,59 @@ body:             |
     $x11 = AND $x12, $x11
     $x10 = SUB $x10, $x11
     PseudoRET implicit $x10
+...
+
+...
+---
+name:            outline_call_1
+tracksRegLiveness: true
+isOutlined: false
+body:             |
+  bb.0:
+    liveins: $x10, $x11, $x5, $x6
+    ; RV32I-MO-LABEL: name: outline_call_1
+    ; RV32I-MO:         $x5 = ADD $x5, $x11
+    ; RV32I-MO-NEXT:    $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_1, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x6, implicit $x10, implicit $x11
+    ; RV32I-MO-NEXT:    PseudoRET implicit $x10
+    ;
+    ; RV64I-MO-LABEL: name: outline_call_1
+    ; RV64I-MO:         $x5 = ADD $x5, $x11
+    ; RV64I-MO-NEXT:    $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_1, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x6, implicit $x10, implicit $x11
+    ; RV64I-MO-NEXT:    PseudoRET implicit $x10
+    $x5 = ADD $x5, $x11
+    $x10 = SUB $x10, $x6
+    $x11 = AND $x6, $x11
+    $x11 = ORI $x11, 1023
+    $x12 = ADDI $x10, 17
+    $x12 = ADDI $x10, 17
+    $x12 = ADDI $x10, 17
+    PseudoRET implicit $x10
+
+...
+---
+name:            outline_call_2
+tracksRegLiveness: true
+isOutlined: false
+body:             |
+  bb.0:
+    liveins: $x10, $x11, $x5, $x6
+    ; RV32I-MO-LABEL: name: outline_call_2
+    ; RV32I-MO:         $x5 = ADD $x5, $x11
+    ; RV32I-MO-NEXT:    $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_1, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x6, implicit $x10, implicit $x11
+    ; RV32I-MO-NEXT:    PseudoRET implicit $x10
+    ;
+    ; RV64I-MO-LABEL: name: outline_call_2
+    ; RV64I-MO:         $x5 = ADD $x5, $x11
+    ; RV64I-MO-NEXT:    $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_1, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x6, implicit $x10, implicit $x11
+    ; RV64I-MO-NEXT:    PseudoRET implicit $x10
+    $x5 = ADD $x5, $x11
+    $x10 = SUB $x10, $x6
+    $x11 = AND $x6, $x11
+    $x11 = ORI $x11, 1023
+    $x12 = ADDI $x10, 17
+    $x12 = ADDI $x10, 17
+    $x12 = ADDI $x10, 17
+    PseudoRET implicit $x10
 
 ...
 



More information about the llvm-commits mailing list