[llvm] [LoongArch] Reimplement to prevent Pseudo{CALL, LA*}_LARGE instruction reordering (PR #100099)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 23 03:48:38 PDT 2024


https://github.com/heiher created https://github.com/llvm/llvm-project/pull/100099

The Pseudo{CALL, LA*}_LARGE instruction patterns specified in psABI v2.30 cannot be reordered. This patch sets scheduling boundaries for these instructions to prevent reordering. The Pseudo{CALL, LA*}_LARGE instruction is moved back to Pre-RA expansion, which will help with subsequent address calculation optimizations.

>From f6d724e8a9bf137f4c58a5d43854024efb13cfc0 Mon Sep 17 00:00:00 2001
From: WANG Rui <wangrui at loongson.cn>
Date: Mon, 22 Jul 2024 22:58:01 +0800
Subject: [PATCH] [LoongArch] Reimplement to prevent Pseudo{CALL, LA*}_LARGE
 instruction reordering

The Pseudo{CALL, LA*}_LARGE instruction patterns specified in psABI v2.30 cannot
be reordered. This patch sets scheduling boundaries for these instructions to
prevent reordering. The Pseudo{CALL, LA*}_LARGE instruction is moved back to
Pre-RA expansion, which will help with subsequent address calculation optimizations.
---
 .../LoongArch/LoongArchExpandPseudoInsts.cpp  | 645 +++++++++---------
 .../Target/LoongArch/LoongArchInstrInfo.cpp   |  94 +++
 .../lib/Target/LoongArch/LoongArchInstrInfo.h |   4 +
 llvm/test/CodeGen/LoongArch/code-models.ll    |  36 +-
 llvm/test/CodeGen/LoongArch/expand-call.ll    |   2 +-
 llvm/test/CodeGen/LoongArch/global-address.ll |  32 +-
 .../LoongArch/global-variable-code-model.ll   |   8 +-
 .../LoongArch/machinelicm-address-pseudos.ll  |  66 +-
 .../LoongArch/psabi-restricted-scheduling.ll  | 148 ++--
 llvm/test/CodeGen/LoongArch/tls-models.ll     |  98 ++-
 10 files changed, 601 insertions(+), 532 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
index c136f5b3e515d..33b93e42bb5c4 100644
--- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
@@ -62,27 +62,47 @@ class LoongArchPreRAExpandPseudo : public MachineFunctionPass {
                                MachineBasicBlock::iterator &NextMBBI,
                                unsigned FlagsHi, unsigned SecondOpcode,
                                unsigned FlagsLo);
+  bool expandLargeAddressLoad(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator MBBI,
+                              MachineBasicBlock::iterator &NextMBBI,
+                              unsigned LastOpcode, unsigned IdentifyingMO);
+  bool expandLargeAddressLoad(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator MBBI,
+                              MachineBasicBlock::iterator &NextMBBI,
+                              unsigned LastOpcode, unsigned IdentifyingMO,
+                              const MachineOperand &Symbol, Register DestReg,
+                              bool EraseFromParent);
   bool expandLoadAddressPcrel(MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator MBBI,
-                              MachineBasicBlock::iterator &NextMBBI);
+                              MachineBasicBlock::iterator &NextMBBI,
+                              bool Large = false);
   bool expandLoadAddressGot(MachineBasicBlock &MBB,
                             MachineBasicBlock::iterator MBBI,
-                            MachineBasicBlock::iterator &NextMBBI);
+                            MachineBasicBlock::iterator &NextMBBI,
+                            bool Large = false);
   bool expandLoadAddressTLSLE(MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator MBBI,
                               MachineBasicBlock::iterator &NextMBBI);
   bool expandLoadAddressTLSIE(MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator MBBI,
-                              MachineBasicBlock::iterator &NextMBBI);
+                              MachineBasicBlock::iterator &NextMBBI,
+                              bool Large = false);
   bool expandLoadAddressTLSLD(MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator MBBI,
-                              MachineBasicBlock::iterator &NextMBBI);
+                              MachineBasicBlock::iterator &NextMBBI,
+                              bool Large = false);
   bool expandLoadAddressTLSGD(MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator MBBI,
-                              MachineBasicBlock::iterator &NextMBBI);
+                              MachineBasicBlock::iterator &NextMBBI,
+                              bool Large = false);
   bool expandLoadAddressTLSDesc(MachineBasicBlock &MBB,
                                 MachineBasicBlock::iterator MBBI,
-                                MachineBasicBlock::iterator &NextMBBI);
+                                MachineBasicBlock::iterator &NextMBBI,
+                                bool Large = false);
+  bool expandFunctionCALL(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MBBI,
+                          MachineBasicBlock::iterator &NextMBBI,
+                          bool IsTailCall);
 };
 
 char LoongArchPreRAExpandPseudo::ID = 0;
@@ -115,18 +135,38 @@ bool LoongArchPreRAExpandPseudo::expandMI(
   switch (MBBI->getOpcode()) {
   case LoongArch::PseudoLA_PCREL:
     return expandLoadAddressPcrel(MBB, MBBI, NextMBBI);
+  case LoongArch::PseudoLA_PCREL_LARGE:
+    return expandLoadAddressPcrel(MBB, MBBI, NextMBBI, /*Large=*/true);
   case LoongArch::PseudoLA_GOT:
     return expandLoadAddressGot(MBB, MBBI, NextMBBI);
+  case LoongArch::PseudoLA_GOT_LARGE:
+    return expandLoadAddressGot(MBB, MBBI, NextMBBI, /*Large=*/true);
   case LoongArch::PseudoLA_TLS_LE:
     return expandLoadAddressTLSLE(MBB, MBBI, NextMBBI);
   case LoongArch::PseudoLA_TLS_IE:
     return expandLoadAddressTLSIE(MBB, MBBI, NextMBBI);
+  case LoongArch::PseudoLA_TLS_IE_LARGE:
+    return expandLoadAddressTLSIE(MBB, MBBI, NextMBBI, /*Large=*/true);
   case LoongArch::PseudoLA_TLS_LD:
     return expandLoadAddressTLSLD(MBB, MBBI, NextMBBI);
+  case LoongArch::PseudoLA_TLS_LD_LARGE:
+    return expandLoadAddressTLSLD(MBB, MBBI, NextMBBI, /*Large=*/true);
   case LoongArch::PseudoLA_TLS_GD:
     return expandLoadAddressTLSGD(MBB, MBBI, NextMBBI);
+  case LoongArch::PseudoLA_TLS_GD_LARGE:
+    return expandLoadAddressTLSGD(MBB, MBBI, NextMBBI, /*Large=*/true);
   case LoongArch::PseudoLA_TLS_DESC_PC:
     return expandLoadAddressTLSDesc(MBB, MBBI, NextMBBI);
+  case LoongArch::PseudoLA_TLS_DESC_PC_LARGE:
+    return expandLoadAddressTLSDesc(MBB, MBBI, NextMBBI, /*Large=*/true);
+  case LoongArch::PseudoCALL:
+  case LoongArch::PseudoCALL_MEDIUM:
+  case LoongArch::PseudoCALL_LARGE:
+    return expandFunctionCALL(MBB, MBBI, NextMBBI, /*IsTailCall=*/false);
+  case LoongArch::PseudoTAIL:
+  case LoongArch::PseudoTAIL_MEDIUM:
+  case LoongArch::PseudoTAIL_LARGE:
+    return expandFunctionCALL(MBB, MBBI, NextMBBI, /*IsTailCall=*/true);
   }
   return false;
 }
@@ -159,9 +199,118 @@ bool LoongArchPreRAExpandPseudo::expandPcalau12iInstPair(
   return true;
 }
 
+bool LoongArchPreRAExpandPseudo::expandLargeAddressLoad(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    MachineBasicBlock::iterator &NextMBBI, unsigned LastOpcode,
+    unsigned IdentifyingMO) {
+  MachineInstr &MI = *MBBI;
+  return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LastOpcode, IdentifyingMO,
+                                MI.getOperand(2), MI.getOperand(0).getReg(),
+                                true);
+}
+
+bool LoongArchPreRAExpandPseudo::expandLargeAddressLoad(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    MachineBasicBlock::iterator &NextMBBI, unsigned LastOpcode,
+    unsigned IdentifyingMO, const MachineOperand &Symbol, Register DestReg,
+    bool EraseFromParent) {
+  // Code Sequence:
+  //
+  // Part1: pcalau12i  $scratch, %MO1(sym)
+  // Part0: addi.d     $dest, $zero, %MO0(sym)
+  // Part2: lu32i.d    $dest, %MO2(sym)
+  // Part3: lu52i.d    $dest, $dest, %MO3(sym)
+  // Fin:   LastOpcode $dest, $dest, $scratch
+
+  unsigned MO0, MO1, MO2, MO3;
+  switch (IdentifyingMO) {
+  default:
+    llvm_unreachable("unsupported identifying MO");
+  case LoongArchII::MO_PCREL_LO:
+    MO0 = IdentifyingMO;
+    MO1 = LoongArchII::MO_PCREL_HI;
+    MO2 = LoongArchII::MO_PCREL64_LO;
+    MO3 = LoongArchII::MO_PCREL64_HI;
+    break;
+  case LoongArchII::MO_GOT_PC_HI:
+  case LoongArchII::MO_LD_PC_HI:
+  case LoongArchII::MO_GD_PC_HI:
+    // These cases relocate just like the GOT case, except for Part1.
+    MO0 = LoongArchII::MO_GOT_PC_LO;
+    MO1 = IdentifyingMO;
+    MO2 = LoongArchII::MO_GOT_PC64_LO;
+    MO3 = LoongArchII::MO_GOT_PC64_HI;
+    break;
+  case LoongArchII::MO_IE_PC_LO:
+    MO0 = IdentifyingMO;
+    MO1 = LoongArchII::MO_IE_PC_HI;
+    MO2 = LoongArchII::MO_IE_PC64_LO;
+    MO3 = LoongArchII::MO_IE_PC64_HI;
+    break;
+  }
+
+  MachineFunction *MF = MBB.getParent();
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+
+  assert(MF->getSubtarget<LoongArchSubtarget>().is64Bit() &&
+         "Large code model requires LA64");
+
+  Register TmpPart1 =
+      MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass);
+  Register TmpPart0 =
+      DestReg.isVirtual()
+          ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass)
+          : DestReg;
+  Register TmpParts02 =
+      DestReg.isVirtual()
+          ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass)
+          : DestReg;
+  Register TmpParts023 =
+      DestReg.isVirtual()
+          ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass)
+          : DestReg;
+
+  auto Part1 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), TmpPart1);
+  auto Part0 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ADDI_D), TmpPart0)
+                   .addReg(LoongArch::R0);
+  auto Part2 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU32I_D), TmpParts02)
+                   // "rj" is needed due to InstrInfo pattern requirement.
+                   .addReg(TmpPart0, RegState::Kill);
+  auto Part3 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU52I_D), TmpParts023)
+                   .addReg(TmpParts02, RegState::Kill);
+  BuildMI(MBB, MBBI, DL, TII->get(LastOpcode), DestReg)
+      .addReg(TmpParts023)
+      .addReg(TmpPart1, RegState::Kill);
+
+  if (Symbol.getType() == MachineOperand::MO_ExternalSymbol) {
+    const char *SymName = Symbol.getSymbolName();
+    Part0.addExternalSymbol(SymName, MO0);
+    Part1.addExternalSymbol(SymName, MO1);
+    Part2.addExternalSymbol(SymName, MO2);
+    Part3.addExternalSymbol(SymName, MO3);
+  } else {
+    Part0.addDisp(Symbol, 0, MO0);
+    Part1.addDisp(Symbol, 0, MO1);
+    Part2.addDisp(Symbol, 0, MO2);
+    Part3.addDisp(Symbol, 0, MO3);
+  }
+
+  if (EraseFromParent)
+    MI.eraseFromParent();
+
+  return true;
+}
+
 bool LoongArchPreRAExpandPseudo::expandLoadAddressPcrel(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-    MachineBasicBlock::iterator &NextMBBI) {
+    MachineBasicBlock::iterator &NextMBBI, bool Large) {
+  if (Large)
+    // Emit the 5-insn large address load sequence with the `%pc` family of
+    // relocs.
+    return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D,
+                                  LoongArchII::MO_PCREL_LO);
+
   // Code Sequence:
   // pcalau12i $rd, %pc_hi20(sym)
   // addi.w/d $rd, $rd, %pc_lo12(sym)
@@ -174,7 +323,13 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressPcrel(
 
 bool LoongArchPreRAExpandPseudo::expandLoadAddressGot(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-    MachineBasicBlock::iterator &NextMBBI) {
+    MachineBasicBlock::iterator &NextMBBI, bool Large) {
+  if (Large)
+    // Emit the 5-insn large address load sequence with the `%got_pc` family
+    // of relocs, loading the result from GOT with `ldx.d` in the end.
+    return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::LDX_D,
+                                  LoongArchII::MO_GOT_PC_HI);
+
   // Code Sequence:
   // pcalau12i $rd, %got_pc_hi20(sym)
   // ld.w/d $rd, $rd, %got_pc_lo12(sym)
@@ -235,7 +390,13 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSLE(
 
 bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSIE(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-    MachineBasicBlock::iterator &NextMBBI) {
+    MachineBasicBlock::iterator &NextMBBI, bool Large) {
+  if (Large)
+    // Emit the 5-insn large address load sequence with the `%ie_pc` family
+    // of relocs, loading the result with `ldx.d` in the end.
+    return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::LDX_D,
+                                  LoongArchII::MO_IE_PC_LO);
+
   // Code Sequence:
   // pcalau12i $rd, %ie_pc_hi20(sym)
   // ld.w/d $rd, $rd, %ie_pc_lo12(sym)
@@ -248,7 +409,13 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSIE(
 
 bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSLD(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-    MachineBasicBlock::iterator &NextMBBI) {
+    MachineBasicBlock::iterator &NextMBBI, bool Large) {
+  if (Large)
+    // Emit the 5-insn large address load sequence with the `%got_pc` family
+    // of relocs, with the `pcalau12i` insn relocated with `%ld_pc_hi20`.
+    return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D,
+                                  LoongArchII::MO_LD_PC_HI);
+
   // Code Sequence:
   // pcalau12i $rd, %ld_pc_hi20(sym)
   // addi.w/d $rd, $rd, %got_pc_lo12(sym)
@@ -261,7 +428,13 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSLD(
 
 bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSGD(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-    MachineBasicBlock::iterator &NextMBBI) {
+    MachineBasicBlock::iterator &NextMBBI, bool Large) {
+  if (Large)
+    // Emit the 5-insn large address load sequence with the `%got_pc` family
+    // of relocs, with the `pcalau12i` insn relocated with `%gd_pc_hi20`.
+    return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D,
+                                  LoongArchII::MO_GD_PC_HI);
+
   // Code Sequence:
   // pcalau12i $rd, %gd_pc_hi20(sym)
   // addi.w/d $rd, $rd, %got_pc_lo12(sym)
@@ -274,13 +447,7 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSGD(
 
 bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSDesc(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-    MachineBasicBlock::iterator &NextMBBI) {
-  // Code Sequence:
-  // pcalau12i $a0, %desc_pc_hi20(sym)
-  // addi.w/d  $a0, $a0, %desc_pc_lo12(sym)
-  // ld.w/d    $ra, $a0, %desc_ld(sym)
-  // jirl      $ra, $ra, %desc_ld(sym)
-  // add.d     $dst, $a0, $tp
+    MachineBasicBlock::iterator &NextMBBI, bool Large) {
   MachineFunction *MF = MBB.getParent();
   MachineInstr &MI = *MBBI;
   DebugLoc DL = MI.getDebugLoc();
@@ -291,25 +458,62 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSDesc(
   unsigned LD = STI.is64Bit() ? LoongArch::LD_D : LoongArch::LD_W;
 
   Register DestReg = MI.getOperand(0).getReg();
-  Register ScratchReg =
+  Register Tmp1Reg =
       MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass);
-  MachineOperand &Symbol = MI.getOperand(1);
+  MachineOperand &Symbol = MI.getOperand(Large ? 2 : 1);
 
-  BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), ScratchReg)
+  BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), Tmp1Reg)
       .addDisp(Symbol, 0, LoongArchII::MO_DESC_PC_HI);
 
-  BuildMI(MBB, MBBI, DL, TII->get(ADDI), LoongArch::R4)
-      .addReg(ScratchReg)
-      .addDisp(Symbol, 0, LoongArchII::MO_DESC_PC_LO);
+  if (Large) {
+    // Code Sequence:
+    //
+    // pcalau12i  $a0, %desc_pc_hi20(sym)
+    // addi.d     $a1, $zero, %desc_pc_lo12(sym)
+    // lu32i.d    $a1, %desc64_pc_lo20(sym)
+    // lu52i.d    $a1, $a1, %desc64_pc_hi12(sym)
+    // add.d      $a0, $a0, $a1
+    // ld.d       $ra, $a0, %desc_ld(sym)
+    // jirl       $ra, $ra, %desc_call(sym)
+    // add.d      $dst, $a0, $tp
+    assert(MBB.getParent()->getSubtarget<LoongArchSubtarget>().is64Bit() &&
+           "Large code model requires LA64");
+    Register Tmp2Reg =
+        MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass);
+    Register Tmp3Reg =
+        MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass);
+    Register Tmp4Reg =
+        MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass);
+    BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ADDI_D), Tmp2Reg)
+        .addReg(LoongArch::R0)
+        .addDisp(Symbol, 0, LoongArchII::MO_DESC_PC_LO);
+    BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU32I_D), Tmp3Reg)
+        .addReg(Tmp2Reg, RegState::Kill)
+        .addDisp(Symbol, 0, LoongArchII::MO_DESC64_PC_LO);
+    BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU52I_D), Tmp4Reg)
+        .addReg(Tmp3Reg)
+        .addDisp(Symbol, 0, LoongArchII::MO_DESC64_PC_HI);
+    BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ADD_D), LoongArch::R4)
+        .addReg(Tmp1Reg)
+        .addReg(Tmp4Reg);
+  } else {
+    // Code Sequence:
+    // pcalau12i $a0, %desc_pc_hi20(sym)
+    // addi.w/d  $a0, $a0, %desc_pc_lo12(sym)
+    // ld.w/d    $ra, $a0, %desc_ld(sym)
+    // jirl      $ra, $ra, %desc_ld(sym)
+    // add.d     $dst, $a0, $tp
+    BuildMI(MBB, MBBI, DL, TII->get(ADDI), LoongArch::R4)
+        .addReg(Tmp1Reg)
+        .addDisp(Symbol, 0, LoongArchII::MO_DESC_PC_LO);
+  }
 
   BuildMI(MBB, MBBI, DL, TII->get(LD), LoongArch::R1)
       .addReg(LoongArch::R4)
       .addDisp(Symbol, 0, LoongArchII::MO_DESC_LD);
-
   BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PseudoDESC_CALL), LoongArch::R1)
       .addReg(LoongArch::R1)
       .addDisp(Symbol, 0, LoongArchII::MO_DESC_CALL);
-
   BuildMI(MBB, MBBI, DL, TII->get(ADD), DestReg)
       .addReg(LoongArch::R4)
       .addReg(LoongArch::R2);
@@ -318,6 +522,85 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSDesc(
   return true;
 }
 
+bool LoongArchPreRAExpandPseudo::expandFunctionCALL(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    MachineBasicBlock::iterator &NextMBBI, bool IsTailCall) {
+  MachineFunction *MF = MBB.getParent();
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+  const MachineOperand &Func = MI.getOperand(0);
+  MachineInstrBuilder CALL;
+  unsigned Opcode;
+
+  switch (MF->getTarget().getCodeModel()) {
+  default:
+    report_fatal_error("Unsupported code model");
+    break;
+  case CodeModel::Small: {
+    // CALL:
+    // bl func
+    // TAIL:
+    // b func
+    Opcode = IsTailCall ? LoongArch::PseudoB_TAIL : LoongArch::BL;
+    CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).add(Func);
+    break;
+  }
+  case CodeModel::Medium: {
+    // CALL:
+    // pcaddu18i $ra, %call36(func)
+    // jirl      $ra, $ra, 0
+    // TAIL:
+    // pcaddu18i $scratch, %call36(func)
+    // jirl      $r0, $scratch, 0
+    Opcode =
+        IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL;
+    Register ScratchReg =
+        IsTailCall
+            ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass)
+            : LoongArch::R1;
+    MachineInstrBuilder MIB =
+        BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCADDU18I), ScratchReg);
+
+    CALL =
+        BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(ScratchReg).addImm(0);
+
+    if (Func.isSymbol())
+      MIB.addExternalSymbol(Func.getSymbolName(), LoongArchII::MO_CALL36);
+    else
+      MIB.addDisp(Func, 0, LoongArchII::MO_CALL36);
+    break;
+  }
+  case CodeModel::Large: {
+    // Emit the 5-insn large address load sequence, either directly or
+    // indirectly in case of going through the GOT, then JIRL_TAIL or
+    // JIRL_CALL to $addr.
+    Opcode =
+        IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL;
+    Register AddrReg =
+        IsTailCall
+            ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass)
+            : LoongArch::R1;
+
+    bool UseGOT = Func.isGlobal() && !Func.getGlobal()->isDSOLocal();
+    unsigned MO = UseGOT ? LoongArchII::MO_GOT_PC_HI : LoongArchII::MO_PCREL_LO;
+    unsigned LAOpcode = UseGOT ? LoongArch::LDX_D : LoongArch::ADD_D;
+    expandLargeAddressLoad(MBB, MBBI, NextMBBI, LAOpcode, MO, Func, AddrReg,
+                           false);
+    CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(AddrReg).addImm(0);
+    break;
+  }
+  }
+
+  // Transfer implicit operands.
+  CALL.copyImplicitOps(MI);
+
+  // Transfer MI flags.
+  CALL.setMIFlags(MI.getFlags());
+
+  MI.eraseFromParent();
+  return true;
+}
+
 class LoongArchExpandPseudo : public MachineFunctionPass {
 public:
   const LoongArchInstrInfo *TII;
@@ -339,38 +622,6 @@ class LoongArchExpandPseudo : public MachineFunctionPass {
                 MachineBasicBlock::iterator &NextMBBI);
   bool expandCopyCFR(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
                      MachineBasicBlock::iterator &NextMBBI);
-  bool expandLargeAddressLoad(MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator MBBI,
-                              MachineBasicBlock::iterator &NextMBBI,
-                              unsigned LastOpcode, unsigned IdentifyingMO);
-  bool expandLargeAddressLoad(MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator MBBI,
-                              MachineBasicBlock::iterator &NextMBBI,
-                              unsigned LastOpcode, unsigned IdentifyingMO,
-                              const MachineOperand &Symbol, Register DestReg,
-                              bool EraseFromParent);
-  bool expandLoadAddressPcrelLarge(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MBBI,
-                                   MachineBasicBlock::iterator &NextMBBI);
-  bool expandLoadAddressGotLarge(MachineBasicBlock &MBB,
-                                 MachineBasicBlock::iterator MBBI,
-                                 MachineBasicBlock::iterator &NextMBBI);
-  bool expandLoadAddressTLSIELarge(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MBBI,
-                                   MachineBasicBlock::iterator &NextMBBI);
-  bool expandLoadAddressTLSLDLarge(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MBBI,
-                                   MachineBasicBlock::iterator &NextMBBI);
-  bool expandLoadAddressTLSGDLarge(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MBBI,
-                                   MachineBasicBlock::iterator &NextMBBI);
-  bool expandLoadAddressTLSDescPcLarge(MachineBasicBlock &MBB,
-                                       MachineBasicBlock::iterator MBBI,
-                                       MachineBasicBlock::iterator &NextMBBI);
-  bool expandFunctionCALL(MachineBasicBlock &MBB,
-                          MachineBasicBlock::iterator MBBI,
-                          MachineBasicBlock::iterator &NextMBBI,
-                          bool IsTailCall);
 };
 
 char LoongArchExpandPseudo::ID = 0;
@@ -405,26 +656,6 @@ bool LoongArchExpandPseudo::expandMI(MachineBasicBlock &MBB,
   switch (MBBI->getOpcode()) {
   case LoongArch::PseudoCopyCFR:
     return expandCopyCFR(MBB, MBBI, NextMBBI);
-  case LoongArch::PseudoLA_PCREL_LARGE:
-    return expandLoadAddressPcrelLarge(MBB, MBBI, NextMBBI);
-  case LoongArch::PseudoLA_GOT_LARGE:
-    return expandLoadAddressGotLarge(MBB, MBBI, NextMBBI);
-  case LoongArch::PseudoLA_TLS_IE_LARGE:
-    return expandLoadAddressTLSIELarge(MBB, MBBI, NextMBBI);
-  case LoongArch::PseudoLA_TLS_LD_LARGE:
-    return expandLoadAddressTLSLDLarge(MBB, MBBI, NextMBBI);
-  case LoongArch::PseudoLA_TLS_GD_LARGE:
-    return expandLoadAddressTLSGDLarge(MBB, MBBI, NextMBBI);
-  case LoongArch::PseudoLA_TLS_DESC_PC_LARGE:
-    return expandLoadAddressTLSDescPcLarge(MBB, MBBI, NextMBBI);
-  case LoongArch::PseudoCALL:
-  case LoongArch::PseudoCALL_MEDIUM:
-  case LoongArch::PseudoCALL_LARGE:
-    return expandFunctionCALL(MBB, MBBI, NextMBBI, /*IsTailCall=*/false);
-  case LoongArch::PseudoTAIL:
-  case LoongArch::PseudoTAIL_MEDIUM:
-  case LoongArch::PseudoTAIL_LARGE:
-    return expandFunctionCALL(MBB, MBBI, NextMBBI, /*IsTailCall=*/true);
   }
 
   return false;
@@ -483,264 +714,6 @@ bool LoongArchExpandPseudo::expandCopyCFR(
   return true;
 }
 
-bool LoongArchExpandPseudo::expandLargeAddressLoad(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-    MachineBasicBlock::iterator &NextMBBI, unsigned LastOpcode,
-    unsigned IdentifyingMO) {
-  MachineInstr &MI = *MBBI;
-  return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LastOpcode, IdentifyingMO,
-                                MI.getOperand(2), MI.getOperand(0).getReg(),
-                                true);
-}
-
-bool LoongArchExpandPseudo::expandLargeAddressLoad(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-    MachineBasicBlock::iterator &NextMBBI, unsigned LastOpcode,
-    unsigned IdentifyingMO, const MachineOperand &Symbol, Register DestReg,
-    bool EraseFromParent) {
-  // Code Sequence:
-  //
-  // Part1: pcalau12i  $dst, %MO1(sym)
-  // Part0: addi.d     $t8, $zero, %MO0(sym)
-  // Part2: lu32i.d    $t8, %MO2(sym)
-  // Part3: lu52i.d    $t8, $t8, %MO3(sym)
-  // Fin:   LastOpcode $dst, $t8, $dst
-
-  unsigned MO0, MO1, MO2, MO3;
-  switch (IdentifyingMO) {
-  default:
-    llvm_unreachable("unsupported identifying MO");
-  case LoongArchII::MO_PCREL_LO:
-    MO0 = IdentifyingMO;
-    MO1 = LoongArchII::MO_PCREL_HI;
-    MO2 = LoongArchII::MO_PCREL64_LO;
-    MO3 = LoongArchII::MO_PCREL64_HI;
-    break;
-  case LoongArchII::MO_GOT_PC_HI:
-  case LoongArchII::MO_LD_PC_HI:
-  case LoongArchII::MO_GD_PC_HI:
-    // These cases relocate just like the GOT case, except for Part1.
-    MO0 = LoongArchII::MO_GOT_PC_LO;
-    MO1 = IdentifyingMO;
-    MO2 = LoongArchII::MO_GOT_PC64_LO;
-    MO3 = LoongArchII::MO_GOT_PC64_HI;
-    break;
-  case LoongArchII::MO_IE_PC_LO:
-    MO0 = IdentifyingMO;
-    MO1 = LoongArchII::MO_IE_PC_HI;
-    MO2 = LoongArchII::MO_IE_PC64_LO;
-    MO3 = LoongArchII::MO_IE_PC64_HI;
-    break;
-  }
-
-  MachineInstr &MI = *MBBI;
-  DebugLoc DL = MI.getDebugLoc();
-  Register ScratchReg = LoongArch::R20; // $t8
-
-  assert(MBB.getParent()->getSubtarget<LoongArchSubtarget>().is64Bit() &&
-         "Large code model requires LA64");
-
-  auto Part1 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), DestReg);
-  auto Part0 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ADDI_D), ScratchReg)
-                   .addReg(LoongArch::R0);
-  auto Part2 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU32I_D), ScratchReg)
-                   // "rj" is needed due to InstrInfo pattern requirement.
-                   .addReg(ScratchReg);
-  auto Part3 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU52I_D), ScratchReg)
-                   .addReg(ScratchReg);
-  BuildMI(MBB, MBBI, DL, TII->get(LastOpcode), DestReg)
-      .addReg(ScratchReg)
-      .addReg(DestReg);
-
-  if (Symbol.getType() == MachineOperand::MO_ExternalSymbol) {
-    const char *SymName = Symbol.getSymbolName();
-    Part0.addExternalSymbol(SymName, MO0);
-    Part1.addExternalSymbol(SymName, MO1);
-    Part2.addExternalSymbol(SymName, MO2);
-    Part3.addExternalSymbol(SymName, MO3);
-  } else {
-    Part0.addDisp(Symbol, 0, MO0);
-    Part1.addDisp(Symbol, 0, MO1);
-    Part2.addDisp(Symbol, 0, MO2);
-    Part3.addDisp(Symbol, 0, MO3);
-  }
-
-  if (EraseFromParent)
-    MI.eraseFromParent();
-
-  return true;
-}
-
-bool LoongArchExpandPseudo::expandLoadAddressPcrelLarge(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-    MachineBasicBlock::iterator &NextMBBI) {
-  // Emit the 5-insn large address load sequence with the `%pc` family of
-  // relocs.
-  return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D,
-                                LoongArchII::MO_PCREL_LO);
-}
-
-bool LoongArchExpandPseudo::expandLoadAddressGotLarge(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-    MachineBasicBlock::iterator &NextMBBI) {
-  // Emit the 5-insn large address load sequence with the `%got_pc` family
-  // of relocs, loading the result from GOT with `ldx.d` in the end.
-  return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::LDX_D,
-                                LoongArchII::MO_GOT_PC_HI);
-}
-
-bool LoongArchExpandPseudo::expandLoadAddressTLSIELarge(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-    MachineBasicBlock::iterator &NextMBBI) {
-  // Emit the 5-insn large address load sequence with the `%ie_pc` family
-  // of relocs, loading the result with `ldx.d` in the end.
-  return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::LDX_D,
-                                LoongArchII::MO_IE_PC_LO);
-}
-
-bool LoongArchExpandPseudo::expandLoadAddressTLSLDLarge(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-    MachineBasicBlock::iterator &NextMBBI) {
-  // Emit the 5-insn large address load sequence with the `%got_pc` family
-  // of relocs, with the `pcalau12i` insn relocated with `%ld_pc_hi20`.
-  return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D,
-                                LoongArchII::MO_LD_PC_HI);
-}
-
-bool LoongArchExpandPseudo::expandLoadAddressTLSGDLarge(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-    MachineBasicBlock::iterator &NextMBBI) {
-  // Emit the 5-insn large address load sequence with the `%got_pc` family
-  // of relocs, with the `pcalau12i` insn relocated with `%gd_pc_hi20`.
-  return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D,
-                                LoongArchII::MO_GD_PC_HI);
-}
-
-bool LoongArchExpandPseudo::expandLoadAddressTLSDescPcLarge(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-    MachineBasicBlock::iterator &NextMBBI) {
-  // Code Sequence:
-  //
-  // pcalau12i  $a0, %desc_pc_hi20(sym)
-  // addi.d     $t8, $zero, %desc_pc_lo12(sym)
-  // lu32i.d    $t8, %desc64_pc_lo20(sym)
-  // lu52i.d    $t8, $t8, %desc64_pc_hi12(sym)
-  // add.d      $a0, $a0, $t8
-  // ld.d       $ra, $a0, %desc_ld(sym)
-  // jirl       $ra, $ra, %desc_call(sym)
-  // add.d      $dst, $a0, $tp
-
-  MachineInstr &MI = *MBBI;
-  DebugLoc DL = MI.getDebugLoc();
-  Register DestReg = MI.getOperand(0).getReg();
-  MachineOperand &Symbol = MI.getOperand(2);
-  Register ScratchReg = LoongArch::R20; // $t8
-
-  assert(MBB.getParent()->getSubtarget<LoongArchSubtarget>().is64Bit() &&
-         "Large code model requires LA64");
-
-  BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), LoongArch::R4)
-      .addDisp(Symbol, 0, LoongArchII::MO_DESC_PC_HI);
-  BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ADDI_D), ScratchReg)
-      .addReg(LoongArch::R0)
-      .addDisp(Symbol, 0, LoongArchII::MO_DESC_PC_LO);
-  BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU32I_D), ScratchReg)
-      .addReg(ScratchReg)
-      .addDisp(Symbol, 0, LoongArchII::MO_DESC64_PC_LO);
-  BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU52I_D), ScratchReg)
-      .addReg(ScratchReg)
-      .addDisp(Symbol, 0, LoongArchII::MO_DESC64_PC_HI);
-  BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ADD_D), LoongArch::R4)
-      .addReg(ScratchReg)
-      .addReg(LoongArch::R4);
-  BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LD_D), LoongArch::R1)
-      .addReg(LoongArch::R4)
-      .addDisp(Symbol, 0, LoongArchII::MO_DESC_LD);
-  BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PseudoDESC_CALL), LoongArch::R1)
-      .addReg(LoongArch::R1)
-      .addDisp(Symbol, 0, LoongArchII::MO_DESC_CALL);
-  BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ADD_D), DestReg)
-      .addReg(LoongArch::R4)
-      .addReg(LoongArch::R2);
-
-  MI.eraseFromParent();
-
-  return true;
-}
-
-bool LoongArchExpandPseudo::expandFunctionCALL(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-    MachineBasicBlock::iterator &NextMBBI, bool IsTailCall) {
-  MachineFunction *MF = MBB.getParent();
-  MachineInstr &MI = *MBBI;
-  DebugLoc DL = MI.getDebugLoc();
-  const MachineOperand &Func = MI.getOperand(0);
-  MachineInstrBuilder CALL;
-  unsigned Opcode;
-
-  switch (MF->getTarget().getCodeModel()) {
-  default:
-    report_fatal_error("Unsupported code model");
-    break;
-  case CodeModel::Small: {
-    // CALL:
-    // bl func
-    // TAIL:
-    // b func
-    Opcode = IsTailCall ? LoongArch::PseudoB_TAIL : LoongArch::BL;
-    CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).add(Func);
-    break;
-  }
-  case CodeModel::Medium: {
-    // CALL:
-    // pcaddu18i  $ra, %call36(func)
-    // jirl       $ra, $ra, 0
-    // TAIL:
-    // pcaddu18i  $t8, %call36(func)
-    // jr         $t8
-    Opcode =
-        IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL;
-    Register ScratchReg = IsTailCall ? LoongArch::R20 : LoongArch::R1;
-    MachineInstrBuilder MIB =
-        BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCADDU18I), ScratchReg);
-
-    CALL =
-        BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(ScratchReg).addImm(0);
-
-    if (Func.isSymbol())
-      MIB.addExternalSymbol(Func.getSymbolName(), LoongArchII::MO_CALL36);
-    else
-      MIB.addDisp(Func, 0, LoongArchII::MO_CALL36);
-    break;
-  }
-  case CodeModel::Large: {
-    // Emit the 5-insn large address load sequence, either directly or
-    // indirectly in case of going through the GOT, then JIRL_TAIL or
-    // JIRL_CALL to $addr.
-    Opcode =
-        IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL;
-    Register AddrReg = IsTailCall ? LoongArch::R19 : LoongArch::R1;
-
-    bool UseGOT = Func.isGlobal() && !Func.getGlobal()->isDSOLocal();
-    unsigned MO = UseGOT ? LoongArchII::MO_GOT_PC_HI : LoongArchII::MO_PCREL_LO;
-    unsigned LAOpcode = UseGOT ? LoongArch::LDX_D : LoongArch::ADD_D;
-    expandLargeAddressLoad(MBB, MBBI, NextMBBI, LAOpcode, MO, Func, AddrReg,
-                           false);
-    CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(AddrReg).addImm(0);
-    break;
-  }
-  }
-
-  // Transfer implicit operands.
-  CALL.copyImplicitOps(MI);
-
-  // Transfer MI flags.
-  CALL.setMIFlags(MI.getFlags());
-
-  MI.eraseFromParent();
-  return true;
-}
-
 } // end namespace
 
 INITIALIZE_PASS(LoongArchPreRAExpandPseudo, "loongarch-prera-expand-pseudo",
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
index a85b054a85d72..47db69607f051 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
@@ -347,6 +347,100 @@ bool LoongArchInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
   }
 }
 
+bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
+                                              const MachineBasicBlock *MBB,
+                                              const MachineFunction &MF) const {
+  if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
+    return true;
+
+  auto MII = MI.getIterator();
+  auto MIE = MBB->end();
+
+  // According to psABI v2.30:
+  //
+  // https://github.com/loongson/la-abi-specs/releases/tag/v2.30
+  //
+  // The following instruction patterns are prohibited from being reordered:
+  //
+  // * pcaddu18 $ra, %call36(s)
+  //   jirl     $ra, $ra, 0
+  //
+  // * pcalau12i $a0, %pc_hi20(s)
+  //   addi.d $a1, $zero, %pc_lo12(s)
+  //   lu32i.d $a1, %pc64_lo20(s)
+  //   lu52i.d $a1, $a1, %pc64_hi12(s)
+  //
+  // * pcalau12i $a0, %got_pc_hi20(s) | %ld_pc_hi20(s) | %gd_pc_hi20(s)
+  //   addi.d $a1, $zero, %got_pc_lo12(s)
+  //   lu32i.d $a1, %got64_pc_lo20(s)
+  //   lu52i.d $a1, $a1, %got64_pc_hi12(s)
+  //
+  // * pcalau12i $a0, %ie_pc_hi20(s)
+  //   addi.d $a1, $zero, %ie_pc_lo12(s)
+  //   lu32i.d $a1, %ie64_pc_lo20(s)
+  //   lu52i.d $a1, $a1, %ie64_pc_hi12(s)
+  switch (MI.getOpcode()) {
+  case LoongArch::PCADDU18I:
+    if (MI.getOperand(1).getTargetFlags() == LoongArchII::MO_CALL36)
+      return true;
+    break;
+  case LoongArch::PCALAU12I: {
+    auto AddI = std::next(MII);
+    if (AddI == MIE || AddI->getOpcode() != LoongArch::ADDI_D)
+      break;
+    auto Lu32I = std::next(AddI);
+    if (Lu32I == MIE || Lu32I->getOpcode() != LoongArch::LU32I_D)
+      break;
+    auto MO0 = MI.getOperand(1).getTargetFlags();
+    auto MO1 = AddI->getOperand(2).getTargetFlags();
+    auto MO2 = Lu32I->getOperand(2).getTargetFlags();
+    if (MO0 == LoongArchII::MO_PCREL_HI && MO1 == LoongArchII::MO_PCREL_LO &&
+        MO2 == LoongArchII::MO_PCREL64_LO)
+      return true;
+    if ((MO0 == LoongArchII::MO_GOT_PC_HI || MO0 == LoongArchII::MO_LD_PC_HI ||
+         MO0 == LoongArchII::MO_GD_PC_HI) &&
+        MO1 == LoongArchII::MO_GOT_PC_LO && MO2 == LoongArchII::MO_GOT_PC64_LO)
+      return true;
+    if (MO0 == LoongArchII::MO_IE_PC_HI && MO1 == LoongArchII::MO_IE_PC_LO &&
+        MO2 == LoongArchII::MO_IE_PC64_LO)
+      return true;
+    break;
+  }
+  case LoongArch::ADDI_D: {
+    auto Lu32I = std::next(MII);
+    if (Lu32I == MIE || Lu32I->getOpcode() != LoongArch::LU32I_D)
+      break;
+    auto MO0 = MI.getOperand(2).getTargetFlags();
+    auto MO1 = Lu32I->getOperand(2).getTargetFlags();
+    if (MO0 == LoongArchII::MO_PCREL_LO && MO1 == LoongArchII::MO_PCREL64_LO)
+      return true;
+    if (MO0 == LoongArchII::MO_GOT_PC_LO && MO1 == LoongArchII::MO_GOT_PC64_LO)
+      return true;
+    if (MO0 == LoongArchII::MO_IE_PC_LO && MO1 == LoongArchII::MO_IE_PC64_LO)
+      return true;
+    break;
+  }
+  case LoongArch::LU32I_D: {
+    auto MO = MI.getOperand(2).getTargetFlags();
+    if (MO == LoongArchII::MO_PCREL64_LO || MO == LoongArchII::MO_GOT_PC64_LO ||
+        MO == LoongArchII::MO_IE_PC64_LO)
+      return true;
+    break;
+  }
+  case LoongArch::LU52I_D: {
+    auto MO = MI.getOperand(2).getTargetFlags();
+    if (MO == LoongArchII::MO_PCREL64_HI || MO == LoongArchII::MO_GOT_PC64_HI ||
+        MO == LoongArchII::MO_IE_PC64_HI)
+      return true;
+    break;
+  }
+  default:
+    break;
+  }
+
+  return false;
+}
+
 unsigned LoongArchInstrInfo::removeBranch(MachineBasicBlock &MBB,
                                           int *BytesRemoved) const {
   if (BytesRemoved)
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
index eb19051e380ca..d66b2cb8efb33 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
@@ -64,6 +64,10 @@ class LoongArchInstrInfo : public LoongArchGenInstrInfo {
   bool isBranchOffsetInRange(unsigned BranchOpc,
                              int64_t BrOffset) const override;
 
+  bool isSchedulingBoundary(const MachineInstr &MI,
+                            const MachineBasicBlock *MBB,
+                            const MachineFunction &MF) const override;
+
   unsigned removeBranch(MachineBasicBlock &MBB,
                         int *BytesRemoved = nullptr) const override;
 
diff --git a/llvm/test/CodeGen/LoongArch/code-models.ll b/llvm/test/CodeGen/LoongArch/code-models.ll
index 4b2b72afaee17..7bc7a982db86d 100644
--- a/llvm/test/CodeGen/LoongArch/code-models.ll
+++ b/llvm/test/CodeGen/LoongArch/code-models.ll
@@ -33,11 +33,11 @@ define i32 @call_globaladdress(i32 %a) nounwind {
 ; LARGE:       # %bb.0:
 ; LARGE-NEXT:    addi.d $sp, $sp, -16
 ; LARGE-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LARGE-NEXT:    pcalau12i $ra, %got_pc_hi20(callee)
-; LARGE-NEXT:    addi.d $t8, $zero, %got_pc_lo12(callee)
-; LARGE-NEXT:    lu32i.d $t8, %got64_pc_lo20(callee)
-; LARGE-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(callee)
-; LARGE-NEXT:    ldx.d $ra, $t8, $ra
+; LARGE-NEXT:    pcalau12i $a1, %got_pc_hi20(callee)
+; LARGE-NEXT:    addi.d $ra, $zero, %got_pc_lo12(callee)
+; LARGE-NEXT:    lu32i.d $ra, %got64_pc_lo20(callee)
+; LARGE-NEXT:    lu52i.d $ra, $ra, %got64_pc_hi12(callee)
+; LARGE-NEXT:    ldx.d $ra, $ra, $a1
 ; LARGE-NEXT:    jirl $ra, $ra, 0
 ; LARGE-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
 ; LARGE-NEXT:    addi.d $sp, $sp, 16
@@ -82,11 +82,11 @@ define void @call_external_sym(ptr %dst) {
 ; LARGE-NEXT:    .cfi_offset 1, -8
 ; LARGE-NEXT:    ori $a2, $zero, 1000
 ; LARGE-NEXT:    move $a1, $zero
-; LARGE-NEXT:    pcalau12i $ra, %pc_hi20(memset)
-; LARGE-NEXT:    addi.d $t8, $zero, %pc_lo12(memset)
-; LARGE-NEXT:    lu32i.d $t8, %pc64_lo20(memset)
-; LARGE-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(memset)
-; LARGE-NEXT:    add.d $ra, $t8, $ra
+; LARGE-NEXT:    pcalau12i $a3, %pc_hi20(memset)
+; LARGE-NEXT:    addi.d $ra, $zero, %pc_lo12(memset)
+; LARGE-NEXT:    lu32i.d $ra, %pc64_lo20(memset)
+; LARGE-NEXT:    lu52i.d $ra, $ra, %pc64_hi12(memset)
+; LARGE-NEXT:    add.d $ra, $ra, $a3
 ; LARGE-NEXT:    jirl $ra, $ra, 0
 ; LARGE-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
 ; LARGE-NEXT:    addi.d $sp, $sp, 16
@@ -105,17 +105,17 @@ define i32 @caller_tail(i32 %i) nounwind {
 ;
 ; MEDIUM-LABEL: caller_tail:
 ; MEDIUM:       # %bb.0: # %entry
-; MEDIUM-NEXT:    pcaddu18i $t8, %call36(callee_tail)
-; MEDIUM-NEXT:    jr $t8
+; MEDIUM-NEXT:    pcaddu18i $a1, %call36(callee_tail)
+; MEDIUM-NEXT:    jr $a1
 ;
 ; LARGE-LABEL: caller_tail:
 ; LARGE:       # %bb.0: # %entry
-; LARGE-NEXT:    pcalau12i $t7, %got_pc_hi20(callee_tail)
-; LARGE-NEXT:    addi.d $t8, $zero, %got_pc_lo12(callee_tail)
-; LARGE-NEXT:    lu32i.d $t8, %got64_pc_lo20(callee_tail)
-; LARGE-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(callee_tail)
-; LARGE-NEXT:    ldx.d $t7, $t8, $t7
-; LARGE-NEXT:    jr $t7
+; LARGE-NEXT:    pcalau12i $a1, %got_pc_hi20(callee_tail)
+; LARGE-NEXT:    addi.d $a2, $zero, %got_pc_lo12(callee_tail)
+; LARGE-NEXT:    lu32i.d $a2, %got64_pc_lo20(callee_tail)
+; LARGE-NEXT:    lu52i.d $a2, $a2, %got64_pc_hi12(callee_tail)
+; LARGE-NEXT:    ldx.d $a1, $a2, $a1
+; LARGE-NEXT:    jr $a1
 entry:
   %r = tail call i32 @callee_tail(i32 %i)
   ret i32 %r
diff --git a/llvm/test/CodeGen/LoongArch/expand-call.ll b/llvm/test/CodeGen/LoongArch/expand-call.ll
index 8c21adbcbb55c..d221200401bc5 100644
--- a/llvm/test/CodeGen/LoongArch/expand-call.ll
+++ b/llvm/test/CodeGen/LoongArch/expand-call.ll
@@ -1,6 +1,6 @@
 ; RUN: llc --mtriple=loongarch64 -mattr=+d --stop-before loongarch-prera-expand-pseudo \
 ; RUN:     --verify-machineinstrs < %s | FileCheck %s --check-prefix=NOEXPAND
-; RUN: llc --mtriple=loongarch64 -mattr=+d --stop-before machine-opt-remark-emitter \
+; RUN: llc --mtriple=loongarch64 --stop-after loongarch-prera-expand-pseudo \
 ; RUN:     --verify-machineinstrs < %s | FileCheck %s --check-prefix=EXPAND
 
 declare void @callee()
diff --git a/llvm/test/CodeGen/LoongArch/global-address.ll b/llvm/test/CodeGen/LoongArch/global-address.ll
index fb2929572f31a..2423dd81a4d3a 100644
--- a/llvm/test/CodeGen/LoongArch/global-address.ll
+++ b/llvm/test/CodeGen/LoongArch/global-address.ll
@@ -53,32 +53,32 @@ define void @foo() nounwind {
 ; LA64LARGENOPIC-LABEL: foo:
 ; LA64LARGENOPIC:       # %bb.0:
 ; LA64LARGENOPIC-NEXT:    pcalau12i $a0, %got_pc_hi20(G)
-; LA64LARGENOPIC-NEXT:    addi.d $t8, $zero, %got_pc_lo12(G)
-; LA64LARGENOPIC-NEXT:    lu32i.d $t8, %got64_pc_lo20(G)
-; LA64LARGENOPIC-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(G)
-; LA64LARGENOPIC-NEXT:    ldx.d $a0, $t8, $a0
+; LA64LARGENOPIC-NEXT:    addi.d $a1, $zero, %got_pc_lo12(G)
+; LA64LARGENOPIC-NEXT:    lu32i.d $a1, %got64_pc_lo20(G)
+; LA64LARGENOPIC-NEXT:    lu52i.d $a1, $a1, %got64_pc_hi12(G)
+; LA64LARGENOPIC-NEXT:    ldx.d $a0, $a1, $a0
 ; LA64LARGENOPIC-NEXT:    ld.w $zero, $a0, 0
 ; LA64LARGENOPIC-NEXT:    pcalau12i $a0, %pc_hi20(g)
-; LA64LARGENOPIC-NEXT:    addi.d $t8, $zero, %pc_lo12(g)
-; LA64LARGENOPIC-NEXT:    lu32i.d $t8, %pc64_lo20(g)
-; LA64LARGENOPIC-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(g)
-; LA64LARGENOPIC-NEXT:    add.d $a0, $t8, $a0
+; LA64LARGENOPIC-NEXT:    addi.d $a1, $zero, %pc_lo12(g)
+; LA64LARGENOPIC-NEXT:    lu32i.d $a1, %pc64_lo20(g)
+; LA64LARGENOPIC-NEXT:    lu52i.d $a1, $a1, %pc64_hi12(g)
+; LA64LARGENOPIC-NEXT:    add.d $a0, $a1, $a0
 ; LA64LARGENOPIC-NEXT:    ld.w $zero, $a0, 0
 ; LA64LARGENOPIC-NEXT:    ret
 ;
 ; LA64LARGEPIC-LABEL: foo:
 ; LA64LARGEPIC:       # %bb.0:
 ; LA64LARGEPIC-NEXT:    pcalau12i $a0, %got_pc_hi20(G)
-; LA64LARGEPIC-NEXT:    addi.d $t8, $zero, %got_pc_lo12(G)
-; LA64LARGEPIC-NEXT:    lu32i.d $t8, %got64_pc_lo20(G)
-; LA64LARGEPIC-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(G)
-; LA64LARGEPIC-NEXT:    ldx.d $a0, $t8, $a0
+; LA64LARGEPIC-NEXT:    addi.d $a1, $zero, %got_pc_lo12(G)
+; LA64LARGEPIC-NEXT:    lu32i.d $a1, %got64_pc_lo20(G)
+; LA64LARGEPIC-NEXT:    lu52i.d $a1, $a1, %got64_pc_hi12(G)
+; LA64LARGEPIC-NEXT:    ldx.d $a0, $a1, $a0
 ; LA64LARGEPIC-NEXT:    ld.w $zero, $a0, 0
 ; LA64LARGEPIC-NEXT:    pcalau12i $a0, %pc_hi20(.Lg$local)
-; LA64LARGEPIC-NEXT:    addi.d $t8, $zero, %pc_lo12(.Lg$local)
-; LA64LARGEPIC-NEXT:    lu32i.d $t8, %pc64_lo20(.Lg$local)
-; LA64LARGEPIC-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(.Lg$local)
-; LA64LARGEPIC-NEXT:    add.d $a0, $t8, $a0
+; LA64LARGEPIC-NEXT:    addi.d $a1, $zero, %pc_lo12(.Lg$local)
+; LA64LARGEPIC-NEXT:    lu32i.d $a1, %pc64_lo20(.Lg$local)
+; LA64LARGEPIC-NEXT:    lu52i.d $a1, $a1, %pc64_hi12(.Lg$local)
+; LA64LARGEPIC-NEXT:    add.d $a0, $a1, $a0
 ; LA64LARGEPIC-NEXT:    ld.w $zero, $a0, 0
 ; LA64LARGEPIC-NEXT:    ret
   %V = load volatile i32, ptr @G
diff --git a/llvm/test/CodeGen/LoongArch/global-variable-code-model.ll b/llvm/test/CodeGen/LoongArch/global-variable-code-model.ll
index 277b0b9061398..2b7a862ecde11 100644
--- a/llvm/test/CodeGen/LoongArch/global-variable-code-model.ll
+++ b/llvm/test/CodeGen/LoongArch/global-variable-code-model.ll
@@ -20,10 +20,10 @@ define dso_local signext i32 @local_large() #0 {
 ; CHECK-LABEL: local_large:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(b)
-; CHECK-NEXT:    addi.d $t8, $zero, %pc_lo12(b)
-; CHECK-NEXT:    lu32i.d $t8, %pc64_lo20(b)
-; CHECK-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(b)
-; CHECK-NEXT:    add.d $a0, $t8, $a0
+; CHECK-NEXT:    addi.d $a1, $zero, %pc_lo12(b)
+; CHECK-NEXT:    lu32i.d $a1, %pc64_lo20(b)
+; CHECK-NEXT:    lu52i.d $a1, $a1, %pc64_hi12(b)
+; CHECK-NEXT:    add.d $a0, $a1, $a0
 ; CHECK-NEXT:    ld.w $a0, $a0, 0
 ; CHECK-NEXT:    ret
   %1 = load i32, ptr @b, align 4
diff --git a/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll b/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll
index ed1a24e82b4e4..5248468b6027d 100644
--- a/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll
+++ b/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll
@@ -41,12 +41,12 @@ define void @test_la_pcrel(i32 signext %n) {
 ;
 ; LA64LARGE-LABEL: test_la_pcrel:
 ; LA64LARGE:       # %bb.0: # %entry
-; LA64LARGE-NEXT:    move $a1, $zero
 ; LA64LARGE-NEXT:    pcalau12i $a2, %pc_hi20(l)
-; LA64LARGE-NEXT:    addi.d $t8, $zero, %pc_lo12(l)
-; LA64LARGE-NEXT:    lu32i.d $t8, %pc64_lo20(l)
-; LA64LARGE-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(l)
-; LA64LARGE-NEXT:    add.d $a2, $t8, $a2
+; LA64LARGE-NEXT:    addi.d $a1, $zero, %pc_lo12(l)
+; LA64LARGE-NEXT:    lu32i.d $a1, %pc64_lo20(l)
+; LA64LARGE-NEXT:    lu52i.d $a3, $a1, %pc64_hi12(l)
+; LA64LARGE-NEXT:    move $a1, $zero
+; LA64LARGE-NEXT:    add.d $a2, $a3, $a2
 ; LA64LARGE-NEXT:    .p2align 4, , 16
 ; LA64LARGE-NEXT:  .LBB0_1: # %loop
 ; LA64LARGE-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -103,10 +103,10 @@ define void @test_la_got(i32 signext %n) {
 ; LA64LARGE-LABEL: test_la_got:
 ; LA64LARGE:       # %bb.0: # %entry
 ; LA64LARGE-NEXT:    pcalau12i $a1, %got_pc_hi20(g)
-; LA64LARGE-NEXT:    addi.d $t8, $zero, %got_pc_lo12(g)
-; LA64LARGE-NEXT:    lu32i.d $t8, %got64_pc_lo20(g)
-; LA64LARGE-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(g)
-; LA64LARGE-NEXT:    ldx.d $a1, $t8, $a1
+; LA64LARGE-NEXT:    addi.d $a2, $zero, %got_pc_lo12(g)
+; LA64LARGE-NEXT:    lu32i.d $a2, %got64_pc_lo20(g)
+; LA64LARGE-NEXT:    lu52i.d $a2, $a2, %got64_pc_hi12(g)
+; LA64LARGE-NEXT:    ldx.d $a1, $a2, $a1
 ; LA64LARGE-NEXT:    move $a2, $zero
 ; LA64LARGE-NEXT:    .p2align 4, , 16
 ; LA64LARGE-NEXT:  .LBB1_1: # %loop
@@ -165,10 +165,10 @@ define void @test_la_tls_ie(i32 signext %n) {
 ; LA64LARGE-LABEL: test_la_tls_ie:
 ; LA64LARGE:       # %bb.0: # %entry
 ; LA64LARGE-NEXT:    pcalau12i $a1, %ie_pc_hi20(ie)
-; LA64LARGE-NEXT:    addi.d $t8, $zero, %ie_pc_lo12(ie)
-; LA64LARGE-NEXT:    lu32i.d $t8, %ie64_pc_lo20(ie)
-; LA64LARGE-NEXT:    lu52i.d $t8, $t8, %ie64_pc_hi12(ie)
-; LA64LARGE-NEXT:    ldx.d $a1, $t8, $a1
+; LA64LARGE-NEXT:    addi.d $a2, $zero, %ie_pc_lo12(ie)
+; LA64LARGE-NEXT:    lu32i.d $a2, %ie64_pc_lo20(ie)
+; LA64LARGE-NEXT:    lu52i.d $a2, $a2, %ie64_pc_hi12(ie)
+; LA64LARGE-NEXT:    ldx.d $a1, $a2, $a1
 ; LA64LARGE-NEXT:    move $a2, $zero
 ; LA64LARGE-NEXT:    .p2align 4, , 16
 ; LA64LARGE-NEXT:  .LBB2_1: # %loop
@@ -272,21 +272,21 @@ define void @test_la_tls_ld(i32 signext %n) {
 ; LA64LARGE-NEXT:    .cfi_offset 23, -24
 ; LA64LARGE-NEXT:    .cfi_offset 24, -32
 ; LA64LARGE-NEXT:    move $fp, $a0
+; LA64LARGE-NEXT:    pcalau12i $a0, %ld_pc_hi20(ld)
+; LA64LARGE-NEXT:    addi.d $a1, $zero, %got_pc_lo12(ld)
+; LA64LARGE-NEXT:    lu32i.d $a1, %got64_pc_lo20(ld)
+; LA64LARGE-NEXT:    lu52i.d $a1, $a1, %got64_pc_hi12(ld)
 ; LA64LARGE-NEXT:    move $s1, $zero
-; LA64LARGE-NEXT:    pcalau12i $s0, %ld_pc_hi20(ld)
-; LA64LARGE-NEXT:    addi.d $t8, $zero, %got_pc_lo12(ld)
-; LA64LARGE-NEXT:    lu32i.d $t8, %got64_pc_lo20(ld)
-; LA64LARGE-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(ld)
-; LA64LARGE-NEXT:    add.d $s0, $t8, $s0
+; LA64LARGE-NEXT:    add.d $s0, $a1, $a0
 ; LA64LARGE-NEXT:    .p2align 4, , 16
 ; LA64LARGE-NEXT:  .LBB3_1: # %loop
 ; LA64LARGE-NEXT:    # =>This Inner Loop Header: Depth=1
 ; LA64LARGE-NEXT:    move $a0, $s0
-; LA64LARGE-NEXT:    pcalau12i $ra, %pc_hi20(__tls_get_addr)
-; LA64LARGE-NEXT:    addi.d $t8, $zero, %pc_lo12(__tls_get_addr)
-; LA64LARGE-NEXT:    lu32i.d $t8, %pc64_lo20(__tls_get_addr)
-; LA64LARGE-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr)
-; LA64LARGE-NEXT:    add.d $ra, $t8, $ra
+; LA64LARGE-NEXT:    pcalau12i $a1, %pc_hi20(__tls_get_addr)
+; LA64LARGE-NEXT:    addi.d $ra, $zero, %pc_lo12(__tls_get_addr)
+; LA64LARGE-NEXT:    lu32i.d $ra, %pc64_lo20(__tls_get_addr)
+; LA64LARGE-NEXT:    lu52i.d $ra, $ra, %pc64_hi12(__tls_get_addr)
+; LA64LARGE-NEXT:    add.d $ra, $ra, $a1
 ; LA64LARGE-NEXT:    jirl $ra, $ra, 0
 ; LA64LARGE-NEXT:    ld.w $zero, $a0, 0
 ; LA64LARGE-NEXT:    addi.w $s1, $s1, 1
@@ -438,21 +438,21 @@ define void @test_la_tls_gd(i32 signext %n) nounwind {
 ; LA64LARGE-NEXT:    st.d $s0, $sp, 8 # 8-byte Folded Spill
 ; LA64LARGE-NEXT:    st.d $s1, $sp, 0 # 8-byte Folded Spill
 ; LA64LARGE-NEXT:    move $fp, $a0
+; LA64LARGE-NEXT:    pcalau12i $a0, %gd_pc_hi20(gd)
+; LA64LARGE-NEXT:    addi.d $a1, $zero, %got_pc_lo12(gd)
+; LA64LARGE-NEXT:    lu32i.d $a1, %got64_pc_lo20(gd)
+; LA64LARGE-NEXT:    lu52i.d $a1, $a1, %got64_pc_hi12(gd)
 ; LA64LARGE-NEXT:    move $s1, $zero
-; LA64LARGE-NEXT:    pcalau12i $s0, %gd_pc_hi20(gd)
-; LA64LARGE-NEXT:    addi.d $t8, $zero, %got_pc_lo12(gd)
-; LA64LARGE-NEXT:    lu32i.d $t8, %got64_pc_lo20(gd)
-; LA64LARGE-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(gd)
-; LA64LARGE-NEXT:    add.d $s0, $t8, $s0
+; LA64LARGE-NEXT:    add.d $s0, $a1, $a0
 ; LA64LARGE-NEXT:    .p2align 4, , 16
 ; LA64LARGE-NEXT:  .LBB5_1: # %loop
 ; LA64LARGE-NEXT:    # =>This Inner Loop Header: Depth=1
 ; LA64LARGE-NEXT:    move $a0, $s0
-; LA64LARGE-NEXT:    pcalau12i $ra, %pc_hi20(__tls_get_addr)
-; LA64LARGE-NEXT:    addi.d $t8, $zero, %pc_lo12(__tls_get_addr)
-; LA64LARGE-NEXT:    lu32i.d $t8, %pc64_lo20(__tls_get_addr)
-; LA64LARGE-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr)
-; LA64LARGE-NEXT:    add.d $ra, $t8, $ra
+; LA64LARGE-NEXT:    pcalau12i $a1, %pc_hi20(__tls_get_addr)
+; LA64LARGE-NEXT:    addi.d $ra, $zero, %pc_lo12(__tls_get_addr)
+; LA64LARGE-NEXT:    lu32i.d $ra, %pc64_lo20(__tls_get_addr)
+; LA64LARGE-NEXT:    lu52i.d $ra, $ra, %pc64_hi12(__tls_get_addr)
+; LA64LARGE-NEXT:    add.d $ra, $ra, $a1
 ; LA64LARGE-NEXT:    jirl $ra, $ra, 0
 ; LA64LARGE-NEXT:    ld.w $zero, $a0, 0
 ; LA64LARGE-NEXT:    addi.w $s1, $s1, 1
diff --git a/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll b/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll
index 6a15d3a9cda30..b03a523fb79a9 100644
--- a/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll
+++ b/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll
@@ -82,54 +82,54 @@ define void @foo() nounwind {
 ; LARGE_NO_SCH-NEXT:    addi.d $sp, $sp, -16
 ; LARGE_NO_SCH-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
 ; LARGE_NO_SCH-NEXT:    pcalau12i $a0, %got_pc_hi20(G)
-; LARGE_NO_SCH-NEXT:    addi.d $t8, $zero, %got_pc_lo12(G)
-; LARGE_NO_SCH-NEXT:    lu32i.d $t8, %got64_pc_lo20(G)
-; LARGE_NO_SCH-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(G)
-; LARGE_NO_SCH-NEXT:    ldx.d $a0, $t8, $a0
+; LARGE_NO_SCH-NEXT:    addi.d $a1, $zero, %got_pc_lo12(G)
+; LARGE_NO_SCH-NEXT:    lu32i.d $a1, %got64_pc_lo20(G)
+; LARGE_NO_SCH-NEXT:    lu52i.d $a1, $a1, %got64_pc_hi12(G)
+; LARGE_NO_SCH-NEXT:    ldx.d $a0, $a1, $a0
 ; LARGE_NO_SCH-NEXT:    ld.d $zero, $a0, 0
 ; LARGE_NO_SCH-NEXT:    pcalau12i $a0, %pc_hi20(.Lg$local)
-; LARGE_NO_SCH-NEXT:    addi.d $t8, $zero, %pc_lo12(.Lg$local)
-; LARGE_NO_SCH-NEXT:    lu32i.d $t8, %pc64_lo20(.Lg$local)
-; LARGE_NO_SCH-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(.Lg$local)
-; LARGE_NO_SCH-NEXT:    add.d $a0, $t8, $a0
+; LARGE_NO_SCH-NEXT:    addi.d $a1, $zero, %pc_lo12(.Lg$local)
+; LARGE_NO_SCH-NEXT:    lu32i.d $a1, %pc64_lo20(.Lg$local)
+; LARGE_NO_SCH-NEXT:    lu52i.d $a1, $a1, %pc64_hi12(.Lg$local)
+; LARGE_NO_SCH-NEXT:    add.d $a0, $a1, $a0
 ; LARGE_NO_SCH-NEXT:    ld.d $zero, $a0, 0
 ; LARGE_NO_SCH-NEXT:    ori $a0, $zero, 1
-; LARGE_NO_SCH-NEXT:    pcalau12i $ra, %got_pc_hi20(bar)
-; LARGE_NO_SCH-NEXT:    addi.d $t8, $zero, %got_pc_lo12(bar)
-; LARGE_NO_SCH-NEXT:    lu32i.d $t8, %got64_pc_lo20(bar)
-; LARGE_NO_SCH-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(bar)
-; LARGE_NO_SCH-NEXT:    ldx.d $ra, $t8, $ra
+; LARGE_NO_SCH-NEXT:    pcalau12i $a1, %got_pc_hi20(bar)
+; LARGE_NO_SCH-NEXT:    addi.d $ra, $zero, %got_pc_lo12(bar)
+; LARGE_NO_SCH-NEXT:    lu32i.d $ra, %got64_pc_lo20(bar)
+; LARGE_NO_SCH-NEXT:    lu52i.d $ra, $ra, %got64_pc_hi12(bar)
+; LARGE_NO_SCH-NEXT:    ldx.d $ra, $ra, $a1
 ; LARGE_NO_SCH-NEXT:    jirl $ra, $ra, 0
 ; LARGE_NO_SCH-NEXT:    pcalau12i $a0, %gd_pc_hi20(gd)
-; LARGE_NO_SCH-NEXT:    addi.d $t8, $zero, %got_pc_lo12(gd)
-; LARGE_NO_SCH-NEXT:    lu32i.d $t8, %got64_pc_lo20(gd)
-; LARGE_NO_SCH-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(gd)
-; LARGE_NO_SCH-NEXT:    add.d $a0, $t8, $a0
-; LARGE_NO_SCH-NEXT:    pcalau12i $ra, %pc_hi20(__tls_get_addr)
-; LARGE_NO_SCH-NEXT:    addi.d $t8, $zero, %pc_lo12(__tls_get_addr)
-; LARGE_NO_SCH-NEXT:    lu32i.d $t8, %pc64_lo20(__tls_get_addr)
-; LARGE_NO_SCH-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr)
-; LARGE_NO_SCH-NEXT:    add.d $ra, $t8, $ra
+; LARGE_NO_SCH-NEXT:    addi.d $a1, $zero, %got_pc_lo12(gd)
+; LARGE_NO_SCH-NEXT:    lu32i.d $a1, %got64_pc_lo20(gd)
+; LARGE_NO_SCH-NEXT:    lu52i.d $a1, $a1, %got64_pc_hi12(gd)
+; LARGE_NO_SCH-NEXT:    add.d $a0, $a1, $a0
+; LARGE_NO_SCH-NEXT:    pcalau12i $a1, %pc_hi20(__tls_get_addr)
+; LARGE_NO_SCH-NEXT:    addi.d $ra, $zero, %pc_lo12(__tls_get_addr)
+; LARGE_NO_SCH-NEXT:    lu32i.d $ra, %pc64_lo20(__tls_get_addr)
+; LARGE_NO_SCH-NEXT:    lu52i.d $ra, $ra, %pc64_hi12(__tls_get_addr)
+; LARGE_NO_SCH-NEXT:    add.d $ra, $ra, $a1
 ; LARGE_NO_SCH-NEXT:    jirl $ra, $ra, 0
 ; LARGE_NO_SCH-NEXT:    ld.d $zero, $a0, 0
 ; LARGE_NO_SCH-NEXT:    pcalau12i $a0, %ld_pc_hi20(ld)
-; LARGE_NO_SCH-NEXT:    addi.d $t8, $zero, %got_pc_lo12(ld)
-; LARGE_NO_SCH-NEXT:    lu32i.d $t8, %got64_pc_lo20(ld)
-; LARGE_NO_SCH-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(ld)
-; LARGE_NO_SCH-NEXT:    add.d $a0, $t8, $a0
-; LARGE_NO_SCH-NEXT:    pcalau12i $ra, %pc_hi20(__tls_get_addr)
-; LARGE_NO_SCH-NEXT:    addi.d $t8, $zero, %pc_lo12(__tls_get_addr)
-; LARGE_NO_SCH-NEXT:    lu32i.d $t8, %pc64_lo20(__tls_get_addr)
-; LARGE_NO_SCH-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr)
-; LARGE_NO_SCH-NEXT:    add.d $ra, $t8, $ra
+; LARGE_NO_SCH-NEXT:    addi.d $a1, $zero, %got_pc_lo12(ld)
+; LARGE_NO_SCH-NEXT:    lu32i.d $a1, %got64_pc_lo20(ld)
+; LARGE_NO_SCH-NEXT:    lu52i.d $a1, $a1, %got64_pc_hi12(ld)
+; LARGE_NO_SCH-NEXT:    add.d $a0, $a1, $a0
+; LARGE_NO_SCH-NEXT:    pcalau12i $a1, %pc_hi20(__tls_get_addr)
+; LARGE_NO_SCH-NEXT:    addi.d $ra, $zero, %pc_lo12(__tls_get_addr)
+; LARGE_NO_SCH-NEXT:    lu32i.d $ra, %pc64_lo20(__tls_get_addr)
+; LARGE_NO_SCH-NEXT:    lu52i.d $ra, $ra, %pc64_hi12(__tls_get_addr)
+; LARGE_NO_SCH-NEXT:    add.d $ra, $ra, $a1
 ; LARGE_NO_SCH-NEXT:    jirl $ra, $ra, 0
-; LARGE_NO_SCH-NEXT:    pcalau12i $a1, %ie_pc_hi20(ie)
-; LARGE_NO_SCH-NEXT:    addi.d $t8, $zero, %ie_pc_lo12(ie)
-; LARGE_NO_SCH-NEXT:    lu32i.d $t8, %ie64_pc_lo20(ie)
-; LARGE_NO_SCH-NEXT:    lu52i.d $t8, $t8, %ie64_pc_hi12(ie)
-; LARGE_NO_SCH-NEXT:    ldx.d $a1, $t8, $a1
 ; LARGE_NO_SCH-NEXT:    ld.d $zero, $a0, 0
-; LARGE_NO_SCH-NEXT:    ldx.d $zero, $a1, $tp
+; LARGE_NO_SCH-NEXT:    pcalau12i $a0, %ie_pc_hi20(ie)
+; LARGE_NO_SCH-NEXT:    addi.d $a1, $zero, %ie_pc_lo12(ie)
+; LARGE_NO_SCH-NEXT:    lu32i.d $a1, %ie64_pc_lo20(ie)
+; LARGE_NO_SCH-NEXT:    lu52i.d $a1, $a1, %ie64_pc_hi12(ie)
+; LARGE_NO_SCH-NEXT:    ldx.d $a0, $a1, $a0
+; LARGE_NO_SCH-NEXT:    ldx.d $zero, $a0, $tp
 ; LARGE_NO_SCH-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
 ; LARGE_NO_SCH-NEXT:    addi.d $sp, $sp, 16
 ; LARGE_NO_SCH-NEXT:    ret
@@ -139,54 +139,54 @@ define void @foo() nounwind {
 ; LARGE_SCH-NEXT:    addi.d $sp, $sp, -16
 ; LARGE_SCH-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
 ; LARGE_SCH-NEXT:    pcalau12i $a0, %got_pc_hi20(G)
-; LARGE_SCH-NEXT:    addi.d $t8, $zero, %got_pc_lo12(G)
-; LARGE_SCH-NEXT:    lu32i.d $t8, %got64_pc_lo20(G)
-; LARGE_SCH-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(G)
-; LARGE_SCH-NEXT:    ldx.d $a0, $t8, $a0
+; LARGE_SCH-NEXT:    addi.d $a1, $zero, %got_pc_lo12(G)
+; LARGE_SCH-NEXT:    lu32i.d $a1, %got64_pc_lo20(G)
+; LARGE_SCH-NEXT:    lu52i.d $a1, $a1, %got64_pc_hi12(G)
+; LARGE_SCH-NEXT:    ldx.d $a0, $a1, $a0
 ; LARGE_SCH-NEXT:    ld.d $zero, $a0, 0
 ; LARGE_SCH-NEXT:    pcalau12i $a0, %pc_hi20(.Lg$local)
-; LARGE_SCH-NEXT:    addi.d $t8, $zero, %pc_lo12(.Lg$local)
-; LARGE_SCH-NEXT:    lu32i.d $t8, %pc64_lo20(.Lg$local)
-; LARGE_SCH-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(.Lg$local)
-; LARGE_SCH-NEXT:    add.d $a0, $t8, $a0
+; LARGE_SCH-NEXT:    addi.d $a1, $zero, %pc_lo12(.Lg$local)
+; LARGE_SCH-NEXT:    lu32i.d $a1, %pc64_lo20(.Lg$local)
+; LARGE_SCH-NEXT:    lu52i.d $a1, $a1, %pc64_hi12(.Lg$local)
+; LARGE_SCH-NEXT:    add.d $a0, $a1, $a0
 ; LARGE_SCH-NEXT:    ld.d $zero, $a0, 0
 ; LARGE_SCH-NEXT:    ori $a0, $zero, 1
-; LARGE_SCH-NEXT:    pcalau12i $ra, %got_pc_hi20(bar)
-; LARGE_SCH-NEXT:    addi.d $t8, $zero, %got_pc_lo12(bar)
-; LARGE_SCH-NEXT:    lu32i.d $t8, %got64_pc_lo20(bar)
-; LARGE_SCH-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(bar)
-; LARGE_SCH-NEXT:    ldx.d $ra, $t8, $ra
+; LARGE_SCH-NEXT:    pcalau12i $a1, %got_pc_hi20(bar)
+; LARGE_SCH-NEXT:    addi.d $ra, $zero, %got_pc_lo12(bar)
+; LARGE_SCH-NEXT:    lu32i.d $ra, %got64_pc_lo20(bar)
+; LARGE_SCH-NEXT:    lu52i.d $ra, $ra, %got64_pc_hi12(bar)
+; LARGE_SCH-NEXT:    ldx.d $ra, $ra, $a1
 ; LARGE_SCH-NEXT:    jirl $ra, $ra, 0
 ; LARGE_SCH-NEXT:    pcalau12i $a0, %gd_pc_hi20(gd)
-; LARGE_SCH-NEXT:    addi.d $t8, $zero, %got_pc_lo12(gd)
-; LARGE_SCH-NEXT:    lu32i.d $t8, %got64_pc_lo20(gd)
-; LARGE_SCH-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(gd)
-; LARGE_SCH-NEXT:    add.d $a0, $t8, $a0
-; LARGE_SCH-NEXT:    pcalau12i $ra, %pc_hi20(__tls_get_addr)
-; LARGE_SCH-NEXT:    addi.d $t8, $zero, %pc_lo12(__tls_get_addr)
-; LARGE_SCH-NEXT:    lu32i.d $t8, %pc64_lo20(__tls_get_addr)
-; LARGE_SCH-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr)
-; LARGE_SCH-NEXT:    add.d $ra, $t8, $ra
+; LARGE_SCH-NEXT:    addi.d $a1, $zero, %got_pc_lo12(gd)
+; LARGE_SCH-NEXT:    lu32i.d $a1, %got64_pc_lo20(gd)
+; LARGE_SCH-NEXT:    lu52i.d $a1, $a1, %got64_pc_hi12(gd)
+; LARGE_SCH-NEXT:    add.d $a0, $a1, $a0
+; LARGE_SCH-NEXT:    pcalau12i $a1, %pc_hi20(__tls_get_addr)
+; LARGE_SCH-NEXT:    addi.d $ra, $zero, %pc_lo12(__tls_get_addr)
+; LARGE_SCH-NEXT:    lu32i.d $ra, %pc64_lo20(__tls_get_addr)
+; LARGE_SCH-NEXT:    lu52i.d $ra, $ra, %pc64_hi12(__tls_get_addr)
+; LARGE_SCH-NEXT:    add.d $ra, $ra, $a1
 ; LARGE_SCH-NEXT:    jirl $ra, $ra, 0
 ; LARGE_SCH-NEXT:    ld.d $zero, $a0, 0
 ; LARGE_SCH-NEXT:    pcalau12i $a0, %ld_pc_hi20(ld)
-; LARGE_SCH-NEXT:    addi.d $t8, $zero, %got_pc_lo12(ld)
-; LARGE_SCH-NEXT:    lu32i.d $t8, %got64_pc_lo20(ld)
-; LARGE_SCH-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(ld)
-; LARGE_SCH-NEXT:    add.d $a0, $t8, $a0
-; LARGE_SCH-NEXT:    pcalau12i $ra, %pc_hi20(__tls_get_addr)
-; LARGE_SCH-NEXT:    addi.d $t8, $zero, %pc_lo12(__tls_get_addr)
-; LARGE_SCH-NEXT:    lu32i.d $t8, %pc64_lo20(__tls_get_addr)
-; LARGE_SCH-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr)
-; LARGE_SCH-NEXT:    add.d $ra, $t8, $ra
+; LARGE_SCH-NEXT:    addi.d $a1, $zero, %got_pc_lo12(ld)
+; LARGE_SCH-NEXT:    lu32i.d $a1, %got64_pc_lo20(ld)
+; LARGE_SCH-NEXT:    lu52i.d $a1, $a1, %got64_pc_hi12(ld)
+; LARGE_SCH-NEXT:    add.d $a0, $a1, $a0
+; LARGE_SCH-NEXT:    pcalau12i $a1, %pc_hi20(__tls_get_addr)
+; LARGE_SCH-NEXT:    addi.d $ra, $zero, %pc_lo12(__tls_get_addr)
+; LARGE_SCH-NEXT:    lu32i.d $ra, %pc64_lo20(__tls_get_addr)
+; LARGE_SCH-NEXT:    lu52i.d $ra, $ra, %pc64_hi12(__tls_get_addr)
+; LARGE_SCH-NEXT:    add.d $ra, $ra, $a1
 ; LARGE_SCH-NEXT:    jirl $ra, $ra, 0
-; LARGE_SCH-NEXT:    pcalau12i $a1, %ie_pc_hi20(ie)
-; LARGE_SCH-NEXT:    addi.d $t8, $zero, %ie_pc_lo12(ie)
-; LARGE_SCH-NEXT:    lu32i.d $t8, %ie64_pc_lo20(ie)
-; LARGE_SCH-NEXT:    lu52i.d $t8, $t8, %ie64_pc_hi12(ie)
-; LARGE_SCH-NEXT:    ldx.d $a1, $t8, $a1
 ; LARGE_SCH-NEXT:    ld.d $zero, $a0, 0
-; LARGE_SCH-NEXT:    ldx.d $zero, $a1, $tp
+; LARGE_SCH-NEXT:    pcalau12i $a0, %ie_pc_hi20(ie)
+; LARGE_SCH-NEXT:    addi.d $a1, $zero, %ie_pc_lo12(ie)
+; LARGE_SCH-NEXT:    lu32i.d $a1, %ie64_pc_lo20(ie)
+; LARGE_SCH-NEXT:    lu52i.d $a1, $a1, %ie64_pc_hi12(ie)
+; LARGE_SCH-NEXT:    ldx.d $a0, $a1, $a0
+; LARGE_SCH-NEXT:    ldx.d $zero, $a0, $tp
 ; LARGE_SCH-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
 ; LARGE_SCH-NEXT:    addi.d $sp, $sp, 16
 ; LARGE_SCH-NEXT:    ret
diff --git a/llvm/test/CodeGen/LoongArch/tls-models.ll b/llvm/test/CodeGen/LoongArch/tls-models.ll
index bb89794d1c843..4ac6201fdd9d4 100644
--- a/llvm/test/CodeGen/LoongArch/tls-models.ll
+++ b/llvm/test/CodeGen/LoongArch/tls-models.ll
@@ -51,15 +51,15 @@ define ptr @f1() nounwind {
 ; LA64LARGEPIC-NEXT:    addi.d $sp, $sp, -16
 ; LA64LARGEPIC-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
 ; LA64LARGEPIC-NEXT:    pcalau12i $a0, %gd_pc_hi20(unspecified)
-; LA64LARGEPIC-NEXT:    addi.d $t8, $zero, %got_pc_lo12(unspecified)
-; LA64LARGEPIC-NEXT:    lu32i.d $t8, %got64_pc_lo20(unspecified)
-; LA64LARGEPIC-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(unspecified)
-; LA64LARGEPIC-NEXT:    add.d $a0, $t8, $a0
-; LA64LARGEPIC-NEXT:    pcalau12i $ra, %pc_hi20(__tls_get_addr)
-; LA64LARGEPIC-NEXT:    addi.d $t8, $zero, %pc_lo12(__tls_get_addr)
-; LA64LARGEPIC-NEXT:    lu32i.d $t8, %pc64_lo20(__tls_get_addr)
-; LA64LARGEPIC-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr)
-; LA64LARGEPIC-NEXT:    add.d $ra, $t8, $ra
+; LA64LARGEPIC-NEXT:    addi.d $a1, $zero, %got_pc_lo12(unspecified)
+; LA64LARGEPIC-NEXT:    lu32i.d $a1, %got64_pc_lo20(unspecified)
+; LA64LARGEPIC-NEXT:    lu52i.d $a1, $a1, %got64_pc_hi12(unspecified)
+; LA64LARGEPIC-NEXT:    add.d $a0, $a1, $a0
+; LA64LARGEPIC-NEXT:    pcalau12i $a1, %pc_hi20(__tls_get_addr)
+; LA64LARGEPIC-NEXT:    addi.d $ra, $zero, %pc_lo12(__tls_get_addr)
+; LA64LARGEPIC-NEXT:    lu32i.d $ra, %pc64_lo20(__tls_get_addr)
+; LA64LARGEPIC-NEXT:    lu52i.d $ra, $ra, %pc64_hi12(__tls_get_addr)
+; LA64LARGEPIC-NEXT:    add.d $ra, $ra, $a1
 ; LA64LARGEPIC-NEXT:    jirl $ra, $ra, 0
 ; LA64LARGEPIC-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
 ; LA64LARGEPIC-NEXT:    addi.d $sp, $sp, 16
@@ -82,10 +82,10 @@ define ptr @f1() nounwind {
 ; LA64LARGENOPIC-LABEL: f1:
 ; LA64LARGENOPIC:       # %bb.0: # %entry
 ; LA64LARGENOPIC-NEXT:    pcalau12i $a0, %ie_pc_hi20(unspecified)
-; LA64LARGENOPIC-NEXT:    addi.d $t8, $zero, %ie_pc_lo12(unspecified)
-; LA64LARGENOPIC-NEXT:    lu32i.d $t8, %ie64_pc_lo20(unspecified)
-; LA64LARGENOPIC-NEXT:    lu52i.d $t8, $t8, %ie64_pc_hi12(unspecified)
-; LA64LARGENOPIC-NEXT:    ldx.d $a0, $t8, $a0
+; LA64LARGENOPIC-NEXT:    addi.d $a1, $zero, %ie_pc_lo12(unspecified)
+; LA64LARGENOPIC-NEXT:    lu32i.d $a1, %ie64_pc_lo20(unspecified)
+; LA64LARGENOPIC-NEXT:    lu52i.d $a1, $a1, %ie64_pc_hi12(unspecified)
+; LA64LARGENOPIC-NEXT:    ldx.d $a0, $a1, $a0
 ; LA64LARGENOPIC-NEXT:    add.d $a0, $a0, $tp
 ; LA64LARGENOPIC-NEXT:    ret
 ;
@@ -120,14 +120,13 @@ define ptr @f1() nounwind {
 ; DESC64-NEXT:    addi.d $sp, $sp, -16
 ; DESC64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
 ; DESC64-NEXT:    pcalau12i $a0, %desc_pc_hi20(unspecified)
-; DESC64-NEXT:    addi.d $t8, $zero, %desc_pc_lo12(unspecified)
-; DESC64-NEXT:    lu32i.d $t8, %desc64_pc_lo20(unspecified)
-; DESC64-NEXT:    lu52i.d $t8, $t8, %desc64_pc_hi12(unspecified)
-; DESC64-NEXT:    add.d $a0, $t8, $a0
+; DESC64-NEXT:    addi.d $a1, $zero, %desc_pc_lo12(unspecified)
+; DESC64-NEXT:    lu32i.d $a1, %desc64_pc_lo20(unspecified)
+; DESC64-NEXT:    lu52i.d $a1, $a1, %desc64_pc_hi12(unspecified)
+; DESC64-NEXT:    add.d $a0, $a0, $a1
 ; DESC64-NEXT:    ld.d $ra, $a0, %desc_ld(unspecified)
 ; DESC64-NEXT:    jirl $ra, $ra, %desc_call(unspecified)
-; DESC64-NEXT:    add.d $a1, $a0, $tp
-; DESC64-NEXT:    move $a0, $a1
+; DESC64-NEXT:    add.d $a0, $a0, $tp
 ; DESC64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
 ; DESC64-NEXT:    addi.d $sp, $sp, 16
 ; DESC64-NEXT:    ret
@@ -165,15 +164,15 @@ define ptr @f2() nounwind {
 ; LA64LARGEPIC-NEXT:    addi.d $sp, $sp, -16
 ; LA64LARGEPIC-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
 ; LA64LARGEPIC-NEXT:    pcalau12i $a0, %ld_pc_hi20(ld)
-; LA64LARGEPIC-NEXT:    addi.d $t8, $zero, %got_pc_lo12(ld)
-; LA64LARGEPIC-NEXT:    lu32i.d $t8, %got64_pc_lo20(ld)
-; LA64LARGEPIC-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(ld)
-; LA64LARGEPIC-NEXT:    add.d $a0, $t8, $a0
-; LA64LARGEPIC-NEXT:    pcalau12i $ra, %pc_hi20(__tls_get_addr)
-; LA64LARGEPIC-NEXT:    addi.d $t8, $zero, %pc_lo12(__tls_get_addr)
-; LA64LARGEPIC-NEXT:    lu32i.d $t8, %pc64_lo20(__tls_get_addr)
-; LA64LARGEPIC-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr)
-; LA64LARGEPIC-NEXT:    add.d $ra, $t8, $ra
+; LA64LARGEPIC-NEXT:    addi.d $a1, $zero, %got_pc_lo12(ld)
+; LA64LARGEPIC-NEXT:    lu32i.d $a1, %got64_pc_lo20(ld)
+; LA64LARGEPIC-NEXT:    lu52i.d $a1, $a1, %got64_pc_hi12(ld)
+; LA64LARGEPIC-NEXT:    add.d $a0, $a1, $a0
+; LA64LARGEPIC-NEXT:    pcalau12i $a1, %pc_hi20(__tls_get_addr)
+; LA64LARGEPIC-NEXT:    addi.d $ra, $zero, %pc_lo12(__tls_get_addr)
+; LA64LARGEPIC-NEXT:    lu32i.d $ra, %pc64_lo20(__tls_get_addr)
+; LA64LARGEPIC-NEXT:    lu52i.d $ra, $ra, %pc64_hi12(__tls_get_addr)
+; LA64LARGEPIC-NEXT:    add.d $ra, $ra, $a1
 ; LA64LARGEPIC-NEXT:    jirl $ra, $ra, 0
 ; LA64LARGEPIC-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
 ; LA64LARGEPIC-NEXT:    addi.d $sp, $sp, 16
@@ -196,10 +195,10 @@ define ptr @f2() nounwind {
 ; LA64LARGENOPIC-LABEL: f2:
 ; LA64LARGENOPIC:       # %bb.0: # %entry
 ; LA64LARGENOPIC-NEXT:    pcalau12i $a0, %ie_pc_hi20(ld)
-; LA64LARGENOPIC-NEXT:    addi.d $t8, $zero, %ie_pc_lo12(ld)
-; LA64LARGENOPIC-NEXT:    lu32i.d $t8, %ie64_pc_lo20(ld)
-; LA64LARGENOPIC-NEXT:    lu52i.d $t8, $t8, %ie64_pc_hi12(ld)
-; LA64LARGENOPIC-NEXT:    ldx.d $a0, $t8, $a0
+; LA64LARGENOPIC-NEXT:    addi.d $a1, $zero, %ie_pc_lo12(ld)
+; LA64LARGENOPIC-NEXT:    lu32i.d $a1, %ie64_pc_lo20(ld)
+; LA64LARGENOPIC-NEXT:    lu52i.d $a1, $a1, %ie64_pc_hi12(ld)
+; LA64LARGENOPIC-NEXT:    ldx.d $a0, $a1, $a0
 ; LA64LARGENOPIC-NEXT:    add.d $a0, $a0, $tp
 ; LA64LARGENOPIC-NEXT:    ret
 ;
@@ -234,14 +233,13 @@ define ptr @f2() nounwind {
 ; DESC64-NEXT:    addi.d $sp, $sp, -16
 ; DESC64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
 ; DESC64-NEXT:    pcalau12i $a0, %desc_pc_hi20(ld)
-; DESC64-NEXT:    addi.d $t8, $zero, %desc_pc_lo12(ld)
-; DESC64-NEXT:    lu32i.d $t8, %desc64_pc_lo20(ld)
-; DESC64-NEXT:    lu52i.d $t8, $t8, %desc64_pc_hi12(ld)
-; DESC64-NEXT:    add.d $a0, $t8, $a0
+; DESC64-NEXT:    addi.d $a1, $zero, %desc_pc_lo12(ld)
+; DESC64-NEXT:    lu32i.d $a1, %desc64_pc_lo20(ld)
+; DESC64-NEXT:    lu52i.d $a1, $a1, %desc64_pc_hi12(ld)
+; DESC64-NEXT:    add.d $a0, $a0, $a1
 ; DESC64-NEXT:    ld.d $ra, $a0, %desc_ld(ld)
 ; DESC64-NEXT:    jirl $ra, $ra, %desc_call(ld)
-; DESC64-NEXT:    add.d $a1, $a0, $tp
-; DESC64-NEXT:    move $a0, $a1
+; DESC64-NEXT:    add.d $a0, $a0, $tp
 ; DESC64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
 ; DESC64-NEXT:    addi.d $sp, $sp, 16
 ; DESC64-NEXT:    ret
@@ -269,10 +267,10 @@ define ptr @f3() nounwind {
 ; LA64LARGEPIC-LABEL: f3:
 ; LA64LARGEPIC:       # %bb.0: # %entry
 ; LA64LARGEPIC-NEXT:    pcalau12i $a0, %ie_pc_hi20(ie)
-; LA64LARGEPIC-NEXT:    addi.d $t8, $zero, %ie_pc_lo12(ie)
-; LA64LARGEPIC-NEXT:    lu32i.d $t8, %ie64_pc_lo20(ie)
-; LA64LARGEPIC-NEXT:    lu52i.d $t8, $t8, %ie64_pc_hi12(ie)
-; LA64LARGEPIC-NEXT:    ldx.d $a0, $t8, $a0
+; LA64LARGEPIC-NEXT:    addi.d $a1, $zero, %ie_pc_lo12(ie)
+; LA64LARGEPIC-NEXT:    lu32i.d $a1, %ie64_pc_lo20(ie)
+; LA64LARGEPIC-NEXT:    lu52i.d $a1, $a1, %ie64_pc_hi12(ie)
+; LA64LARGEPIC-NEXT:    ldx.d $a0, $a1, $a0
 ; LA64LARGEPIC-NEXT:    add.d $a0, $a0, $tp
 ; LA64LARGEPIC-NEXT:    ret
 ;
@@ -293,10 +291,10 @@ define ptr @f3() nounwind {
 ; LA64LARGENOPIC-LABEL: f3:
 ; LA64LARGENOPIC:       # %bb.0: # %entry
 ; LA64LARGENOPIC-NEXT:    pcalau12i $a0, %ie_pc_hi20(ie)
-; LA64LARGENOPIC-NEXT:    addi.d $t8, $zero, %ie_pc_lo12(ie)
-; LA64LARGENOPIC-NEXT:    lu32i.d $t8, %ie64_pc_lo20(ie)
-; LA64LARGENOPIC-NEXT:    lu52i.d $t8, $t8, %ie64_pc_hi12(ie)
-; LA64LARGENOPIC-NEXT:    ldx.d $a0, $t8, $a0
+; LA64LARGENOPIC-NEXT:    addi.d $a1, $zero, %ie_pc_lo12(ie)
+; LA64LARGENOPIC-NEXT:    lu32i.d $a1, %ie64_pc_lo20(ie)
+; LA64LARGENOPIC-NEXT:    lu52i.d $a1, $a1, %ie64_pc_hi12(ie)
+; LA64LARGENOPIC-NEXT:    ldx.d $a0, $a1, $a0
 ; LA64LARGENOPIC-NEXT:    add.d $a0, $a0, $tp
 ; LA64LARGENOPIC-NEXT:    ret
 ;
@@ -317,10 +315,10 @@ define ptr @f3() nounwind {
 ; DESC64-LABEL: f3:
 ; DESC64:       # %bb.0: # %entry
 ; DESC64-NEXT:    pcalau12i $a0, %ie_pc_hi20(ie)
-; DESC64-NEXT:    addi.d $t8, $zero, %ie_pc_lo12(ie)
-; DESC64-NEXT:    lu32i.d $t8, %ie64_pc_lo20(ie)
-; DESC64-NEXT:    lu52i.d $t8, $t8, %ie64_pc_hi12(ie)
-; DESC64-NEXT:    ldx.d $a0, $t8, $a0
+; DESC64-NEXT:    addi.d $a1, $zero, %ie_pc_lo12(ie)
+; DESC64-NEXT:    lu32i.d $a1, %ie64_pc_lo20(ie)
+; DESC64-NEXT:    lu52i.d $a1, $a1, %ie64_pc_hi12(ie)
+; DESC64-NEXT:    ldx.d $a0, $a1, $a0
 ; DESC64-NEXT:    add.d $a0, $a0, $tp
 ; DESC64-NEXT:    ret
 entry:



More information about the llvm-commits mailing list