[llvm] [RISCV] Generate MIPS load/store pair instructions (PR #124717)

Wed Feb 12 01:53:47 PST 2025

https://github.com/djtodoro updated https://github.com/llvm/llvm-project/pull/124717

>From f3006f8c8d761b67e8eb15a73fb60f7fa9342b10 Mon Sep 17 00:00:00 2001
From: Djordje Todorovic <djordje.todorovic at htecgroup.com>
Date: Mon, 27 Jan 2025 09:57:39 +0100
Subject: [PATCH 1/3] [RISCV] Generate MIPS load/store pair instructions

Introduce RISCVLoadStoreOptimizer MIR Pass that will do
the optimization. It bundles loads and stores that operate on
consecutive memory locations to take the advantage of hardware
load/store bonding or replaces them with ldp/sdp instructions.

This is part of MIPS extensions for the p8700 CPU.

Production of ldp/sdp instructions is OFF by default, since it
is beneficial for -Os only in the case of p8700 CPU.
---
 llvm/lib/Target/RISCV/CMakeLists.txt          |   1 +
 llvm/lib/Target/RISCV/RISCVInstrInfo.cpp      |  40 ++
 llvm/lib/Target/RISCV/RISCVInstrInfo.h        |   6 +
 .../Target/RISCV/RISCVLoadStoreOptimizer.cpp  | 364 +++++++++++++
 llvm/lib/Target/RISCV/RISCVSubtarget.cpp      |   4 +
 llvm/lib/Target/RISCV/RISCVTargetMachine.cpp  |  15 +-
 llvm/test/CodeGen/RISCV/O0-pipeline.ll        |   1 +
 llvm/test/CodeGen/RISCV/O3-pipeline.ll        |   2 +
 llvm/test/CodeGen/RISCV/load-store-pair.ll    | 509 ++++++++++++++++++
 9 files changed, 941 insertions(+), 1 deletion(-)
 create mode 100644 llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
 create mode 100644 llvm/test/CodeGen/RISCV/load-store-pair.ll

diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
index 9b23a5ab521c8..815b6ccf54fd0 100644
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -48,6 +48,7 @@ add_llvm_target(RISCVCodeGen
   RISCVISelLowering.cpp
   RISCVLandingPadSetup.cpp
   RISCVMachineFunctionInfo.cpp
+  RISCVLoadStoreOptimizer.cpp
   RISCVMergeBaseOffset.cpp
   RISCVOptWInstrs.cpp
   RISCVPostRAExpandPseudoInsts.cpp
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 1ec299e3c8cc0..b1e141b2a20ba 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -2749,6 +2749,46 @@ MachineInstr *RISCVInstrInfo::emitLdStWithAddr(MachineInstr &MemI,
       .setMIFlags(MemI.getFlags());
 }
 
+bool RISCVInstrInfo::isPairableLdStInstOpc(unsigned Opc) {
+  switch (Opc) {
+  default:
+    return false;
+  case RISCV::SH:
+  case RISCV::LH:
+  case RISCV::LHU:
+  case RISCV::SW:
+  case RISCV::FSW:
+  case RISCV::LW:
+  case RISCV::FLW:
+  case RISCV::SD:
+  case RISCV::FSD:
+  case RISCV::LD:
+  case RISCV::FLD:
+    return true;
+  }
+}
+
+bool RISCVInstrInfo::isLdStSafeToPair(const MachineInstr &LdSt,
+                                      const TargetRegisterInfo *TRI) {
+  // If this is a volatile load/store, don't mess with it.
+  if (LdSt.hasOrderedMemoryRef() || LdSt.getNumExplicitOperands() != 3)
+    return false;
+
+  if (LdSt.getOperand(1).isFI())
+    return true;
+
+  assert(LdSt.getOperand(1).isReg() && "Expected a reg operand.");
+  // Can't cluster if the instruction modifies the base register
+  // or it is update form. e.g. ld x5,8(x5)
+  if (LdSt.modifiesRegister(LdSt.getOperand(1).getReg(), TRI))
+    return false;
+
+  if (!LdSt.getOperand(2).isImm())
+    return false;
+
+  return true;
+}
+
 bool RISCVInstrInfo::getMemOperandsWithOffsetWidth(
     const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
     int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 1c81719c767ec..64cfb243b7bdb 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -300,6 +300,12 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
   std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
   analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override;
 
+  /// Return true if pairing the given load or store may be paired with another.
+  static bool isPairableLdStInstOpc(unsigned Opc);
+
+  static bool isLdStSafeToPair(const MachineInstr &LdSt,
+                               const TargetRegisterInfo *TRI);
+
 protected:
   const RISCVSubtarget &STI;
 
diff --git a/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
new file mode 100644
index 0000000000000..1421ede686d26
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
@@ -0,0 +1,364 @@
+//===----- RISCVLoadStoreOptimizer.cpp ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Bundle loads and stores that operate on consecutive memory locations to take
+// the advantage of hardware load/store bonding.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVTargetMachine.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-load-store-opt"
+#define RISCV_LOAD_STORE_OPT_NAME "RISCV Load / Store Optimizer"
+namespace {
+
+struct RISCVLoadStoreOpt : public MachineFunctionPass {
+  static char ID;
+  bool runOnMachineFunction(MachineFunction &Fn) override;
+
+  RISCVLoadStoreOpt() : MachineFunctionPass(ID) {}
+
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::NoVRegs);
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<AAResultsWrapperPass>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  StringRef getPassName() const override { return RISCV_LOAD_STORE_OPT_NAME; }
+
+  // Find and pair load/store instructions.
+  bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
+
+  // Convert load/store pairs to single instructions.
+  bool tryConvertToLdStPair(MachineBasicBlock::iterator First,
+                            MachineBasicBlock::iterator Second);
+
+  // Scan the instructions looking for a load/store that can be combined
+  // with the current instruction into a load/store pair.
+  // Return the matching instruction if one is found, else MBB->end().
+  MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
+                                               bool &MergeForward);
+
+  MachineBasicBlock::iterator
+  mergePairedInsns(MachineBasicBlock::iterator I,
+                   MachineBasicBlock::iterator Paired, bool MergeForward);
+
+private:
+  AliasAnalysis *AA;
+  MachineRegisterInfo *MRI;
+  const RISCVInstrInfo *TII;
+  const RISCVRegisterInfo *TRI;
+  LiveRegUnits ModifiedRegUnits, UsedRegUnits;
+};
+} // end anonymous namespace
+
+char RISCVLoadStoreOpt::ID = 0;
+INITIALIZE_PASS(RISCVLoadStoreOpt, DEBUG_TYPE, RISCV_LOAD_STORE_OPT_NAME, false,
+                false)
+
+bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+  if (skipFunction(Fn.getFunction()))
+    return false;
+  const RISCVSubtarget &Subtarget = Fn.getSubtarget<RISCVSubtarget>();
+  if (!Subtarget.useLoadStorePairs())
+    return false;
+
+  bool MadeChange = false;
+  TII = Subtarget.getInstrInfo();
+  TRI = Subtarget.getRegisterInfo();
+  MRI = &Fn.getRegInfo();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+  ModifiedRegUnits.init(*TRI);
+  UsedRegUnits.init(*TRI);
+
+  for (MachineBasicBlock &MBB : Fn) {
+    LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
+
+    for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+         MBBI != E;) {
+      if (TII->isPairableLdStInstOpc(MBBI->getOpcode()) &&
+          tryToPairLdStInst(MBBI))
+        MadeChange = true;
+      else
+        ++MBBI;
+    }
+  }
+  return MadeChange;
+}
+
+// Find loads and stores that can be merged into a single load or store pair
+// instruction.
+bool RISCVLoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
+  MachineInstr &MI = *MBBI;
+  MachineBasicBlock::iterator E = MI.getParent()->end();
+
+  if (!TII->isLdStSafeToPair(MI, TRI))
+    return false;
+
+  // Look ahead for a pairable instruction.
+  bool MergeForward;
+  MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, MergeForward);
+  if (Paired != E) {
+    MBBI = mergePairedInsns(MBBI, Paired, MergeForward);
+    return true;
+  }
+  return false;
+}
+
+bool RISCVLoadStoreOpt::tryConvertToLdStPair(
+    MachineBasicBlock::iterator First, MachineBasicBlock::iterator Second) {
+  unsigned PairOpc;
+  // TODO: Handle the rest from RISCVInstrInfo::isPairableLdStInstOpc.
+  switch (First->getOpcode()) {
+  default:
+    return false;
+  case RISCV::SW:
+    PairOpc = RISCV::SWP;
+    break;
+  case RISCV::LW:
+    PairOpc = RISCV::LWP;
+    break;
+  case RISCV::SD:
+    PairOpc = RISCV::SDP;
+    break;
+  case RISCV::LD:
+    PairOpc = RISCV::LDP;
+    break;
+  }
+
+  MachineFunction *MF = First->getMF();
+  const MachineMemOperand *MMO = *First->memoperands_begin();
+  Align MMOAlign = MMO->getAlign();
+  if (const PseudoSourceValue *Source = MMO->getPseudoValue())
+    if (Source->kind() == PseudoSourceValue::FixedStack)
+      MMOAlign = MF->getSubtarget().getFrameLowering()->getStackAlign();
+
+  if (MMOAlign < Align(MMO->getSize().getValue() * 2))
+    return false;
+  int64_t Offset = First->getOperand(2).getImm();
+  if (!isUInt<7>(Offset) ||
+      !isAligned(Align(MMO->getSize().getValue()), Offset))
+    return false;
+  MachineInstrBuilder MIB = BuildMI(
+      *MF,
+      First->getDebugLoc().get() ? First->getDebugLoc() : Second->getDebugLoc(),
+      TII->get(PairOpc));
+  MIB.add(First->getOperand(0))
+      .add(Second->getOperand(0))
+      .add(First->getOperand(1))
+      .add(First->getOperand(2))
+      .cloneMergedMemRefs({&*First, &*Second});
+
+  First->getParent()->insert(First, MIB);
+
+  First->removeFromParent();
+  Second->removeFromParent();
+
+  return true;
+}
+
+static bool mayAlias(MachineInstr &MIa,
+                     SmallVectorImpl<MachineInstr *> &MemInsns,
+                     AliasAnalysis *AA) {
+  for (MachineInstr *MIb : MemInsns)
+    if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false))
+      return true;
+
+  return false;
+}
+
+// Scan the instructions looking for a load/store that can be combined with the
+// current instruction into a wider equivalent or a load/store pair.
+// TODO: Extend pairing logic to consider reordering both instructions
+// to a safe "middle" position rather than only merging forward/backward.
+// This requires more sophisticated checks for aliasing, register
+// liveness, and potential scheduling hazards.
+MachineBasicBlock::iterator
+RISCVLoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
+                                    bool &MergeForward) {
+  MachineBasicBlock::iterator E = I->getParent()->end();
+  MachineBasicBlock::iterator MBBI = I;
+  MachineInstr &FirstMI = *I;
+  MBBI = next_nodbg(MBBI, E);
+
+  bool MayLoad = FirstMI.mayLoad();
+  Register Reg = FirstMI.getOperand(0).getReg();
+  Register BaseReg = FirstMI.getOperand(1).getReg();
+  int Offset = FirstMI.getOperand(2).getImm();
+  int OffsetStride = (*FirstMI.memoperands_begin())->getSize().getValue();
+
+  MergeForward = false;
+
+  // Track which register units have been modified and used between the first
+  // insn (inclusive) and the second insn.
+  ModifiedRegUnits.clear();
+  UsedRegUnits.clear();
+
+  // Remember any instructions that read/write memory between FirstMI and MI.
+  SmallVector<MachineInstr *, 4> MemInsns;
+
+  for (unsigned Count = 0; MBBI != E && Count < 128;
+       MBBI = next_nodbg(MBBI, E)) {
+    MachineInstr &MI = *MBBI;
+
+    // Don't count transient instructions towards the search limit since there
+    // may be different numbers of them if e.g. debug information is present.
+    if (!MI.isTransient())
+      ++Count;
+
+    if (MI.getOpcode() == FirstMI.getOpcode() &&
+        TII->isLdStSafeToPair(MI, TRI)) {
+      Register MIBaseReg = MI.getOperand(1).getReg();
+      int MIOffset = MI.getOperand(2).getImm();
+
+      if (BaseReg == MIBaseReg) {
+
+        if ((Offset != MIOffset + OffsetStride) &&
+            (Offset + OffsetStride != MIOffset)) {
+          LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
+                                            TRI);
+          MemInsns.push_back(&MI);
+          continue;
+        }
+
+        // If the destination register of one load is the same register or a
+        // sub/super register of the other load, bail and keep looking.
+        if (MayLoad &&
+            TRI->isSuperOrSubRegisterEq(Reg, MI.getOperand(0).getReg())) {
+          LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
+                                            TRI);
+          MemInsns.push_back(&MI);
+          continue;
+        }
+
+        // If the BaseReg has been modified, then we cannot do the optimization.
+        if (!ModifiedRegUnits.available(BaseReg))
+          return E;
+
+        // If the Rt of the second instruction was not modified or used between
+        // the two instructions and none of the instructions between the second
+        // and first alias with the second, we can combine the second into the
+        // first.
+        if (ModifiedRegUnits.available(MI.getOperand(0).getReg()) &&
+            !(MI.mayLoad() &&
+              !UsedRegUnits.available(MI.getOperand(0).getReg())) &&
+            !mayAlias(MI, MemInsns, AA)) {
+
+          MergeForward = false;
+          return MBBI;
+        }
+
+        // Likewise, if the Rt of the first instruction is not modified or used
+        // between the two instructions and none of the instructions between the
+        // first and the second alias with the first, we can combine the first
+        // into the second.
+        if (!(MayLoad &&
+              !UsedRegUnits.available(FirstMI.getOperand(0).getReg())) &&
+            !mayAlias(FirstMI, MemInsns, AA)) {
+
+          if (ModifiedRegUnits.available(FirstMI.getOperand(0).getReg())) {
+            MergeForward = true;
+            return MBBI;
+          }
+        }
+        // Unable to combine these instructions due to interference in between.
+        // Keep looking.
+      }
+    }
+
+    // If the instruction wasn't a matching load or store.  Stop searching if we
+    // encounter a call instruction that might modify memory.
+    if (MI.isCall())
+      return E;
+
+    // Update modified / uses register units.
+    LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
+
+    // Otherwise, if the base register is modified, we have no match, so
+    // return early.
+    if (!ModifiedRegUnits.available(BaseReg))
+      return E;
+
+    // Update list of instructions that read/write memory.
+    if (MI.mayLoadOrStore())
+      MemInsns.push_back(&MI);
+  }
+  return E;
+}
+
+MachineBasicBlock::iterator
+RISCVLoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
+                                    MachineBasicBlock::iterator Paired,
+                                    bool MergeForward) {
+  MachineBasicBlock::iterator E = I->getParent()->end();
+  MachineBasicBlock::iterator NextI = next_nodbg(I, E);
+  if (NextI == Paired)
+    NextI = next_nodbg(NextI, E);
+
+  // Insert our new paired instruction after whichever of the paired
+  // instructions MergeForward indicates.
+  MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
+  MachineBasicBlock::iterator DeletionPoint = MergeForward ? I : Paired;
+  int Offset = I->getOperand(2).getImm();
+  int PairedOffset = Paired->getOperand(2).getImm();
+  bool InsertAfter = (Offset < PairedOffset) ^ MergeForward;
+
+  if (!MergeForward)
+    Paired->getOperand(1).setIsKill(false);
+
+  // Kill flags may become invalid when moving stores for pairing.
+  if (I->getOperand(0).isUse()) {
+    if (!MergeForward) {
+      // Clear kill flags on store if moving upwards.
+      I->getOperand(0).setIsKill(false);
+      Paired->getOperand(0).setIsKill(false);
+    } else {
+      // Clear kill flags of the first stores register.
+      Register Reg = I->getOperand(0).getReg();
+      for (MachineInstr &MI : make_range(std::next(I), std::next(Paired)))
+        MI.clearRegisterKills(Reg, TRI);
+    }
+  }
+
+  MachineInstr *ToInsert = DeletionPoint->removeFromParent();
+  MachineBasicBlock &MBB = *InsertionPoint->getParent();
+  MachineBasicBlock::iterator First, Second;
+
+  if (!InsertAfter) {
+    First = MBB.insert(InsertionPoint, ToInsert);
+    Second = InsertionPoint;
+  } else {
+    Second = MBB.insertAfter(InsertionPoint, ToInsert);
+    First = InsertionPoint;
+  }
+
+  if (!tryConvertToLdStPair(First, Second))
+    finalizeBundle(MBB, First.getInstrIterator(),
+                   std::next(Second).getInstrIterator());
+
+  LLVM_DEBUG(dbgs() << "Bonding pair load/store:\n    ");
+  LLVM_DEBUG(prev_nodbg(NextI, MBB.begin())->print(dbgs()));
+  return NextI;
+}
+
+// Returns an instance of the Load / Store Optimization pass.
+FunctionPass *llvm::createRISCVLoadStoreOptPass() {
+  return new RISCVLoadStoreOpt();
+}
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index 1b54c278820fc..d7439c50be076 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -248,6 +248,10 @@ void RISCVSubtarget::overridePostRASchedPolicy(MachineSchedPolicy &Policy,
   }
 }
 
+bool RISCVSubtarget::useLoadStorePairs() const {
+  return UseMIPSLoadStorePairsOpt && HasVendorXMIPSLSP;
+}
+
 bool RISCVSubtarget::useCCMovInsn() const {
   return UseCCMovInsn && HasVendorXMIPSCMove;
 }
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 167dbb53c5950..c98c7891b045d 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -144,6 +144,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
   initializeRISCVDAGToDAGISelLegacyPass(*PR);
   initializeRISCVMoveMergePass(*PR);
   initializeRISCVPushPopOptPass(*PR);
+  initializeRISCVLoadStoreOptPass(*PR);
 }
 
 static StringRef computeDataLayout(const Triple &TT,
@@ -310,7 +311,12 @@ RISCVTargetMachine::createMachineScheduler(MachineSchedContext *C) const {
 ScheduleDAGInstrs *
 RISCVTargetMachine::createPostMachineScheduler(MachineSchedContext *C) const {
   ScheduleDAGMI *DAG = nullptr;
-  if (EnablePostMISchedLoadStoreClustering) {
+  const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
+  bool EnableLoadStoreClusteringForLoadStorePairOpt =
+      !ST.getMacroFusions().empty() && ST.useLoadStorePairs();
+
+  if (EnablePostMISchedLoadStoreClustering ||
+      EnableLoadStoreClusteringForLoadStorePairOpt) {
     DAG = createGenericSchedPostRA(C);
     DAG->addMutation(createLoadClusterDAGMutation(
         DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
@@ -550,6 +556,8 @@ void RISCVPassConfig::addPreSched2() {
 
   // Emit KCFI checks for indirect calls.
   addPass(createKCFIPass());
+  if (TM->getOptLevel() != CodeGenOptLevel::None)
+    addPass(createRISCVLoadStoreOptPass());
 }
 
 void RISCVPassConfig::addPreEmitPass() {
@@ -563,6 +571,11 @@ void RISCVPassConfig::addPreEmitPass() {
     addPass(createMachineCopyPropagationPass(true));
   addPass(&BranchRelaxationPassID);
   addPass(createRISCVMakeCompressibleOptPass());
+
+  // LoadStoreOptimizer creates bundles for load-store bonding.
+  addPass(createUnpackMachineBundles([](const MachineFunction &MF) {
+    return MF.getSubtarget<RISCVSubtarget>().useLoadStorePairs();
+  }));
 }
 
 void RISCVPassConfig::addPreEmitPass2() {
diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
index a50c303819f23..8512376631c5a 100644
--- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
@@ -64,6 +64,7 @@
 ; CHECK-NEXT:       Implement the 'patchable-function' attribute
 ; CHECK-NEXT:       Branch relaxation pass
 ; CHECK-NEXT:       RISC-V Make Compressible
+; CHECK-NEXT:       Unpack machine instruction bundles
 ; CHECK-NEXT:       Contiguously Lay Out Funclets
 ; CHECK-NEXT:       Remove Loads Into Fake Uses
 ; CHECK-NEXT:       StackMap Liveness Analysis
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index 2646dfeca4eb6..e903e04ed3ec2 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -181,6 +181,7 @@
 ; CHECK-NEXT:       Post-RA pseudo instruction expansion pass
 ; CHECK-NEXT:       RISC-V post-regalloc pseudo instruction expansion pass
 ; CHECK-NEXT:       Insert KCFI indirect call checks
+; CHECK-NEXT:       RISCV Load / Store Optimizer
 ; CHECK-NEXT:       MachineDominator Tree Construction
 ; CHECK-NEXT:       Machine Natural Loop Construction
 ; CHECK-NEXT:       PostRA Machine Instruction Scheduler
@@ -194,6 +195,7 @@
 ; CHECK-NEXT:       Machine Copy Propagation Pass
 ; CHECK-NEXT:       Branch relaxation pass
 ; CHECK-NEXT:       RISC-V Make Compressible
+; CHECK-NEXT:       Unpack machine instruction bundles
 ; CHECK-NEXT:       Contiguously Lay Out Funclets
 ; CHECK-NEXT:       Remove Loads Into Fake Uses
 ; CHECK-NEXT:       StackMap Liveness Analysis
diff --git a/llvm/test/CodeGen/RISCV/load-store-pair.ll b/llvm/test/CodeGen/RISCV/load-store-pair.ll
new file mode 100644
index 0000000000000..4f415e789c14b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/load-store-pair.ll
@@ -0,0 +1,509 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv32 -target-abi ilp32d -mattr=+d -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32D
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv64 -target-abi lp64d -mattr=+d -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64D
+; RUN: llc -mtriple=riscv32 -mattr=+Xmipslsp -mips-riscv-load-store-pairs=1 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32I_PAIR
+; RUN: llc -mtriple=riscv32 -target-abi ilp32d -mattr=+d,+Xmipslsp -mips-riscv-load-store-pairs=1 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32D_PAIR
+; RUN: llc -mtriple=riscv64 -mattr=+Xmipslsp -mips-riscv-load-store-pairs=1 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64I_PAIR
+; RUN: llc -mtriple=riscv64 -mcpu mips-p8700 -mattr=+Xmipslsp -mips-riscv-load-store-pairs=1 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64P_8700
+; RUN: llc -mtriple=riscv64 -target-abi lp64d -mattr=+d,+Xmipslsp -mips-riscv-load-store-pairs=1 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64D_PAIR
+; RUN: llc -mtriple=riscv64 -target-abi lp64d -mattr=+d -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64D_8700
+
+define dso_local void @testi(i8** nocapture noundef readonly %a) local_unnamed_addr #0 {
+; RV32I-LABEL: testi:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    .cfi_def_cfa_offset 16
+; RV32I-NEXT:    sw s2, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s3, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s4, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s5, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    .cfi_offset s2, -4
+; RV32I-NEXT:    .cfi_offset s3, -8
+; RV32I-NEXT:    .cfi_offset s4, -12
+; RV32I-NEXT:    .cfi_offset s5, -16
+; RV32I-NEXT:    lw s3, 0(a0)
+; RV32I-NEXT:    lw s2, 4(a0)
+; RV32I-NEXT:    lw s5, 8(a0)
+; RV32I-NEXT:    lw s4, 12(a0)
+; RV32I-NEXT:    #APP
+; RV32I-NEXT:    #NO_APP
+; RV32I-NEXT:    lw s2, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    .cfi_restore s2
+; RV32I-NEXT:    .cfi_restore s3
+; RV32I-NEXT:    .cfi_restore s4
+; RV32I-NEXT:    .cfi_restore s5
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    .cfi_def_cfa_offset 0
+; RV32I-NEXT:    ret
+;
+; RV32D-LABEL: testi:
+; RV32D:       # %bb.0: # %entry
+; RV32D-NEXT:    addi sp, sp, -16
+; RV32D-NEXT:    .cfi_def_cfa_offset 16
+; RV32D-NEXT:    sw s2, 12(sp) # 4-byte Folded Spill
+; RV32D-NEXT:    sw s3, 8(sp) # 4-byte Folded Spill
+; RV32D-NEXT:    sw s4, 4(sp) # 4-byte Folded Spill
+; RV32D-NEXT:    sw s5, 0(sp) # 4-byte Folded Spill
+; RV32D-NEXT:    .cfi_offset s2, -4
+; RV32D-NEXT:    .cfi_offset s3, -8
+; RV32D-NEXT:    .cfi_offset s4, -12
+; RV32D-NEXT:    .cfi_offset s5, -16
+; RV32D-NEXT:    lw s3, 0(a0)
+; RV32D-NEXT:    lw s2, 4(a0)
+; RV32D-NEXT:    lw s5, 8(a0)
+; RV32D-NEXT:    lw s4, 12(a0)
+; RV32D-NEXT:    #APP
+; RV32D-NEXT:    #NO_APP
+; RV32D-NEXT:    lw s2, 12(sp) # 4-byte Folded Reload
+; RV32D-NEXT:    lw s3, 8(sp) # 4-byte Folded Reload
+; RV32D-NEXT:    lw s4, 4(sp) # 4-byte Folded Reload
+; RV32D-NEXT:    lw s5, 0(sp) # 4-byte Folded Reload
+; RV32D-NEXT:    .cfi_restore s2
+; RV32D-NEXT:    .cfi_restore s3
+; RV32D-NEXT:    .cfi_restore s4
+; RV32D-NEXT:    .cfi_restore s5
+; RV32D-NEXT:    addi sp, sp, 16
+; RV32D-NEXT:    .cfi_def_cfa_offset 0
+; RV32D-NEXT:    ret
+;
+; RV64I-LABEL: testi:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    addi sp, sp, -32
+; RV64I-NEXT:    .cfi_def_cfa_offset 32
+; RV64I-NEXT:    sd s2, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s3, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s4, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s5, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    .cfi_offset s2, -8
+; RV64I-NEXT:    .cfi_offset s3, -16
+; RV64I-NEXT:    .cfi_offset s4, -24
+; RV64I-NEXT:    .cfi_offset s5, -32
+; RV64I-NEXT:    ld s3, 0(a0)
+; RV64I-NEXT:    ld s2, 8(a0)
+; RV64I-NEXT:    ld s5, 16(a0)
+; RV64I-NEXT:    ld s4, 24(a0)
+; RV64I-NEXT:    #APP
+; RV64I-NEXT:    #NO_APP
+; RV64I-NEXT:    ld s2, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s5, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    .cfi_restore s2
+; RV64I-NEXT:    .cfi_restore s3
+; RV64I-NEXT:    .cfi_restore s4
+; RV64I-NEXT:    .cfi_restore s5
+; RV64I-NEXT:    addi sp, sp, 32
+; RV64I-NEXT:    .cfi_def_cfa_offset 0
+; RV64I-NEXT:    ret
+;
+; RV64D-LABEL: testi:
+; RV64D:       # %bb.0: # %entry
+; RV64D-NEXT:    addi sp, sp, -32
+; RV64D-NEXT:    .cfi_def_cfa_offset 32
+; RV64D-NEXT:    sd s2, 24(sp) # 8-byte Folded Spill
+; RV64D-NEXT:    sd s3, 16(sp) # 8-byte Folded Spill
+; RV64D-NEXT:    sd s4, 8(sp) # 8-byte Folded Spill
+; RV64D-NEXT:    sd s5, 0(sp) # 8-byte Folded Spill
+; RV64D-NEXT:    .cfi_offset s2, -8
+; RV64D-NEXT:    .cfi_offset s3, -16
+; RV64D-NEXT:    .cfi_offset s4, -24
+; RV64D-NEXT:    .cfi_offset s5, -32
+; RV64D-NEXT:    ld s3, 0(a0)
+; RV64D-NEXT:    ld s2, 8(a0)
+; RV64D-NEXT:    ld s5, 16(a0)
+; RV64D-NEXT:    ld s4, 24(a0)
+; RV64D-NEXT:    #APP
+; RV64D-NEXT:    #NO_APP
+; RV64D-NEXT:    ld s2, 24(sp) # 8-byte Folded Reload
+; RV64D-NEXT:    ld s3, 16(sp) # 8-byte Folded Reload
+; RV64D-NEXT:    ld s4, 8(sp) # 8-byte Folded Reload
+; RV64D-NEXT:    ld s5, 0(sp) # 8-byte Folded Reload
+; RV64D-NEXT:    .cfi_restore s2
+; RV64D-NEXT:    .cfi_restore s3
+; RV64D-NEXT:    .cfi_restore s4
+; RV64D-NEXT:    .cfi_restore s5
+; RV64D-NEXT:    addi sp, sp, 32
+; RV64D-NEXT:    .cfi_def_cfa_offset 0
+; RV64D-NEXT:    ret
+;
+; RV32I_PAIR-LABEL: testi:
+; RV32I_PAIR:       # %bb.0: # %entry
+; RV32I_PAIR-NEXT:    addi sp, sp, -16
+; RV32I_PAIR-NEXT:    .cfi_def_cfa_offset 16
+; RV32I_PAIR-NEXT:    mips.swp s3, s2, 8(sp) # 8-byte Folded Spill
+; RV32I_PAIR-NEXT:    mips.swp s5, s4, 0(sp) # 8-byte Folded Spill
+; RV32I_PAIR-NEXT:    .cfi_offset s2, -4
+; RV32I_PAIR-NEXT:    .cfi_offset s3, -8
+; RV32I_PAIR-NEXT:    .cfi_offset s4, -12
+; RV32I_PAIR-NEXT:    .cfi_offset s5, -16
+; RV32I_PAIR-NEXT:    mips.lwp s3, s2, 0(a0)
+; RV32I_PAIR-NEXT:    mips.lwp s5, s4, 8(a0)
+; RV32I_PAIR-NEXT:    #APP
+; RV32I_PAIR-NEXT:    #NO_APP
+; RV32I_PAIR-NEXT:    mips.lwp s3, s2, 8(sp) # 8-byte Folded Reload
+; RV32I_PAIR-NEXT:    mips.lwp s5, s4, 0(sp) # 8-byte Folded Reload
+; RV32I_PAIR-NEXT:    .cfi_restore s2
+; RV32I_PAIR-NEXT:    .cfi_restore s3
+; RV32I_PAIR-NEXT:    .cfi_restore s4
+; RV32I_PAIR-NEXT:    .cfi_restore s5
+; RV32I_PAIR-NEXT:    addi sp, sp, 16
+; RV32I_PAIR-NEXT:    .cfi_def_cfa_offset 0
+; RV32I_PAIR-NEXT:    ret
+;
+; RV32D_PAIR-LABEL: testi:
+; RV32D_PAIR:       # %bb.0: # %entry
+; RV32D_PAIR-NEXT:    addi sp, sp, -16
+; RV32D_PAIR-NEXT:    .cfi_def_cfa_offset 16
+; RV32D_PAIR-NEXT:    mips.swp s3, s2, 8(sp) # 8-byte Folded Spill
+; RV32D_PAIR-NEXT:    mips.swp s5, s4, 0(sp) # 8-byte Folded Spill
+; RV32D_PAIR-NEXT:    .cfi_offset s2, -4
+; RV32D_PAIR-NEXT:    .cfi_offset s3, -8
+; RV32D_PAIR-NEXT:    .cfi_offset s4, -12
+; RV32D_PAIR-NEXT:    .cfi_offset s5, -16
+; RV32D_PAIR-NEXT:    mips.lwp s3, s2, 0(a0)
+; RV32D_PAIR-NEXT:    mips.lwp s5, s4, 8(a0)
+; RV32D_PAIR-NEXT:    #APP
+; RV32D_PAIR-NEXT:    #NO_APP
+; RV32D_PAIR-NEXT:    mips.lwp s3, s2, 8(sp) # 8-byte Folded Reload
+; RV32D_PAIR-NEXT:    mips.lwp s5, s4, 0(sp) # 8-byte Folded Reload
+; RV32D_PAIR-NEXT:    .cfi_restore s2
+; RV32D_PAIR-NEXT:    .cfi_restore s3
+; RV32D_PAIR-NEXT:    .cfi_restore s4
+; RV32D_PAIR-NEXT:    .cfi_restore s5
+; RV32D_PAIR-NEXT:    addi sp, sp, 16
+; RV32D_PAIR-NEXT:    .cfi_def_cfa_offset 0
+; RV32D_PAIR-NEXT:    ret
+;
+; RV64I_PAIR-LABEL: testi:
+; RV64I_PAIR:       # %bb.0: # %entry
+; RV64I_PAIR-NEXT:    addi sp, sp, -32
+; RV64I_PAIR-NEXT:    .cfi_def_cfa_offset 32
+; RV64I_PAIR-NEXT:    mips.sdp s3, s2, 16(sp) # 16-byte Folded Spill
+; RV64I_PAIR-NEXT:    mips.sdp s5, s4, 0(sp) # 16-byte Folded Spill
+; RV64I_PAIR-NEXT:    .cfi_offset s2, -8
+; RV64I_PAIR-NEXT:    .cfi_offset s3, -16
+; RV64I_PAIR-NEXT:    .cfi_offset s4, -24
+; RV64I_PAIR-NEXT:    .cfi_offset s5, -32
+; RV64I_PAIR-NEXT:    ld s3, 0(a0)
+; RV64I_PAIR-NEXT:    ld s2, 8(a0)
+; RV64I_PAIR-NEXT:    ld s5, 16(a0)
+; RV64I_PAIR-NEXT:    ld s4, 24(a0)
+; RV64I_PAIR-NEXT:    #APP
+; RV64I_PAIR-NEXT:    #NO_APP
+; RV64I_PAIR-NEXT:    mips.ldp s3, s2, 16(sp) # 16-byte Folded Reload
+; RV64I_PAIR-NEXT:    mips.ldp s5, s4, 0(sp) # 16-byte Folded Reload
+; RV64I_PAIR-NEXT:    .cfi_restore s2
+; RV64I_PAIR-NEXT:    .cfi_restore s3
+; RV64I_PAIR-NEXT:    .cfi_restore s4
+; RV64I_PAIR-NEXT:    .cfi_restore s5
+; RV64I_PAIR-NEXT:    addi sp, sp, 32
+; RV64I_PAIR-NEXT:    .cfi_def_cfa_offset 0
+; RV64I_PAIR-NEXT:    ret
+;
+; RV64P_8700-LABEL: testi:
+; RV64P_8700:       # %bb.0: # %entry
+; RV64P_8700-NEXT:    addi sp, sp, -32
+; RV64P_8700-NEXT:    .cfi_def_cfa_offset 32
+; RV64P_8700-NEXT:    mips.sdp s3, s2, 16(sp) # 16-byte Folded Spill
+; RV64P_8700-NEXT:    mips.sdp s5, s4, 0(sp) # 16-byte Folded Spill
+; RV64P_8700-NEXT:    .cfi_offset s2, -8
+; RV64P_8700-NEXT:    .cfi_offset s3, -16
+; RV64P_8700-NEXT:    .cfi_offset s4, -24
+; RV64P_8700-NEXT:    .cfi_offset s5, -32
+; RV64P_8700-NEXT:    ld s3, 0(a0)
+; RV64P_8700-NEXT:    ld s2, 8(a0)
+; RV64P_8700-NEXT:    ld s5, 16(a0)
+; RV64P_8700-NEXT:    ld s4, 24(a0)
+; RV64P_8700-NEXT:    #APP
+; RV64P_8700-NEXT:    #NO_APP
+; RV64P_8700-NEXT:    mips.ldp s3, s2, 16(sp) # 16-byte Folded Reload
+; RV64P_8700-NEXT:    mips.ldp s5, s4, 0(sp) # 16-byte Folded Reload
+; RV64P_8700-NEXT:    .cfi_restore s2
+; RV64P_8700-NEXT:    .cfi_restore s3
+; RV64P_8700-NEXT:    .cfi_restore s4
+; RV64P_8700-NEXT:    .cfi_restore s5
+; RV64P_8700-NEXT:    addi sp, sp, 32
+; RV64P_8700-NEXT:    .cfi_def_cfa_offset 0
+; RV64P_8700-NEXT:    ret
+;
+; RV64D_PAIR-LABEL: testi:
+; RV64D_PAIR:       # %bb.0: # %entry
+; RV64D_PAIR-NEXT:    addi sp, sp, -32
+; RV64D_PAIR-NEXT:    .cfi_def_cfa_offset 32
+; RV64D_PAIR-NEXT:    mips.sdp s3, s2, 16(sp) # 16-byte Folded Spill
+; RV64D_PAIR-NEXT:    mips.sdp s5, s4, 0(sp) # 16-byte Folded Spill
+; RV64D_PAIR-NEXT:    .cfi_offset s2, -8
+; RV64D_PAIR-NEXT:    .cfi_offset s3, -16
+; RV64D_PAIR-NEXT:    .cfi_offset s4, -24
+; RV64D_PAIR-NEXT:    .cfi_offset s5, -32
+; RV64D_PAIR-NEXT:    ld s3, 0(a0)
+; RV64D_PAIR-NEXT:    ld s2, 8(a0)
+; RV64D_PAIR-NEXT:    ld s5, 16(a0)
+; RV64D_PAIR-NEXT:    ld s4, 24(a0)
+; RV64D_PAIR-NEXT:    #APP
+; RV64D_PAIR-NEXT:    #NO_APP
+; RV64D_PAIR-NEXT:    mips.ldp s3, s2, 16(sp) # 16-byte Folded Reload
+; RV64D_PAIR-NEXT:    mips.ldp s5, s4, 0(sp) # 16-byte Folded Reload
+; RV64D_PAIR-NEXT:    .cfi_restore s2
+; RV64D_PAIR-NEXT:    .cfi_restore s3
+; RV64D_PAIR-NEXT:    .cfi_restore s4
+; RV64D_PAIR-NEXT:    .cfi_restore s5
+; RV64D_PAIR-NEXT:    addi sp, sp, 32
+; RV64D_PAIR-NEXT:    .cfi_def_cfa_offset 0
+; RV64D_PAIR-NEXT:    ret
+;
+; RV64D_8700-LABEL: testi:
+; RV64D_8700:       # %bb.0: # %entry
+; RV64D_8700-NEXT:    addi sp, sp, -32
+; RV64D_8700-NEXT:    .cfi_def_cfa_offset 32
+; RV64D_8700-NEXT:    sd s2, 24(sp) # 8-byte Folded Spill
+; RV64D_8700-NEXT:    sd s3, 16(sp) # 8-byte Folded Spill
+; RV64D_8700-NEXT:    sd s4, 8(sp) # 8-byte Folded Spill
+; RV64D_8700-NEXT:    sd s5, 0(sp) # 8-byte Folded Spill
+; RV64D_8700-NEXT:    .cfi_offset s2, -8
+; RV64D_8700-NEXT:    .cfi_offset s3, -16
+; RV64D_8700-NEXT:    .cfi_offset s4, -24
+; RV64D_8700-NEXT:    .cfi_offset s5, -32
+; RV64D_8700-NEXT:    ld s3, 0(a0)
+; RV64D_8700-NEXT:    ld s2, 8(a0)
+; RV64D_8700-NEXT:    ld s5, 16(a0)
+; RV64D_8700-NEXT:    ld s4, 24(a0)
+; RV64D_8700-NEXT:    #APP
+; RV64D_8700-NEXT:    #NO_APP
+; RV64D_8700-NEXT:    ld s2, 24(sp) # 8-byte Folded Reload
+; RV64D_8700-NEXT:    ld s3, 16(sp) # 8-byte Folded Reload
+; RV64D_8700-NEXT:    ld s4, 8(sp) # 8-byte Folded Reload
+; RV64D_8700-NEXT:    ld s5, 0(sp) # 8-byte Folded Reload
+; RV64D_8700-NEXT:    .cfi_restore s2
+; RV64D_8700-NEXT:    .cfi_restore s3
+; RV64D_8700-NEXT:    .cfi_restore s4
+; RV64D_8700-NEXT:    .cfi_restore s5
+; RV64D_8700-NEXT:    addi sp, sp, 32
+; RV64D_8700-NEXT:    .cfi_def_cfa_offset 0
+; RV64D_8700-NEXT:    ret
+entry:
+  %arrayidx = getelementptr inbounds i8*, i8** %a, i64 1
+  %0 = load i8*, i8** %arrayidx, align 8
+  %1 = load i8*, i8** %a, align 8
+  %arrayidx2 = getelementptr inbounds i8*, i8** %a, i64 3
+  %2 = load i8*, i8** %arrayidx2, align 8
+  %arrayidx3 = getelementptr inbounds i8*, i8** %a, i64 2
+  %3 = load i8*, i8** %arrayidx3, align 8
+  tail call void asm sideeffect "", "{x18},{x19},{x20},{x21}"(i8* %0, i8* %1, i8* %2, i8* %3)
+  ret void
+}
+
+
+define dso_local void @testf(float* nocapture noundef readonly %a) local_unnamed_addr #0 {
+; RV32I-LABEL: testf:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lw a3, 0(a0)
+; RV32I-NEXT:    lw a4, 4(a0)
+; RV32I-NEXT:    lw a2, 8(a0)
+; RV32I-NEXT:    lw a1, 12(a0)
+; RV32I-NEXT:    mv a0, a4
+; RV32I-NEXT:    tail sinkf
+;
+; RV32D-LABEL: testf:
+; RV32D:       # %bb.0: # %entry
+; RV32D-NEXT:    flw fa3, 0(a0)
+; RV32D-NEXT:    flw fa0, 4(a0)
+; RV32D-NEXT:    flw fa2, 8(a0)
+; RV32D-NEXT:    flw fa1, 12(a0)
+; RV32D-NEXT:    tail sinkf
+;
+; RV64I-LABEL: testf:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    lw a3, 0(a0)
+; RV64I-NEXT:    lw a4, 4(a0)
+; RV64I-NEXT:    lw a2, 8(a0)
+; RV64I-NEXT:    lw a1, 12(a0)
+; RV64I-NEXT:    mv a0, a4
+; RV64I-NEXT:    tail sinkf
+;
+; RV64D-LABEL: testf:
+; RV64D:       # %bb.0: # %entry
+; RV64D-NEXT:    flw fa3, 0(a0)
+; RV64D-NEXT:    flw fa0, 4(a0)
+; RV64D-NEXT:    flw fa2, 8(a0)
+; RV64D-NEXT:    flw fa1, 12(a0)
+; RV64D-NEXT:    tail sinkf
+;
+; RV32I_PAIR-LABEL: testf:
+; RV32I_PAIR:       # %bb.0: # %entry
+; RV32I_PAIR-NEXT:    lw a3, 0(a0)
+; RV32I_PAIR-NEXT:    lw a4, 4(a0)
+; RV32I_PAIR-NEXT:    lw a2, 8(a0)
+; RV32I_PAIR-NEXT:    lw a1, 12(a0)
+; RV32I_PAIR-NEXT:    mv a0, a4
+; RV32I_PAIR-NEXT:    tail sinkf
+;
+; RV32D_PAIR-LABEL: testf:
+; RV32D_PAIR:       # %bb.0: # %entry
+; RV32D_PAIR-NEXT:    flw fa3, 0(a0)
+; RV32D_PAIR-NEXT:    flw fa0, 4(a0)
+; RV32D_PAIR-NEXT:    flw fa2, 8(a0)
+; RV32D_PAIR-NEXT:    flw fa1, 12(a0)
+; RV32D_PAIR-NEXT:    tail sinkf
+;
+; RV64I_PAIR-LABEL: testf:
+; RV64I_PAIR:       # %bb.0: # %entry
+; RV64I_PAIR-NEXT:    lw a3, 0(a0)
+; RV64I_PAIR-NEXT:    lw a4, 4(a0)
+; RV64I_PAIR-NEXT:    lw a2, 8(a0)
+; RV64I_PAIR-NEXT:    lw a1, 12(a0)
+; RV64I_PAIR-NEXT:    mv a0, a4
+; RV64I_PAIR-NEXT:    tail sinkf
+;
+; RV64P_8700-LABEL: testf:
+; RV64P_8700:       # %bb.0: # %entry
+; RV64P_8700-NEXT:    flw fa3, 0(a0)
+; RV64P_8700-NEXT:    flw fa0, 4(a0)
+; RV64P_8700-NEXT:    flw fa2, 8(a0)
+; RV64P_8700-NEXT:    flw fa1, 12(a0)
+; RV64P_8700-NEXT:    tail sinkf
+;
+; RV64D_PAIR-LABEL: testf:
+; RV64D_PAIR:       # %bb.0: # %entry
+; RV64D_PAIR-NEXT:    flw fa3, 0(a0)
+; RV64D_PAIR-NEXT:    flw fa0, 4(a0)
+; RV64D_PAIR-NEXT:    flw fa2, 8(a0)
+; RV64D_PAIR-NEXT:    flw fa1, 12(a0)
+; RV64D_PAIR-NEXT:    tail sinkf
+;
+; RV64D_8700-LABEL: testf:
+; RV64D_8700:       # %bb.0: # %entry
+; RV64D_8700-NEXT:    flw fa3, 0(a0)
+; RV64D_8700-NEXT:    flw fa0, 4(a0)
+; RV64D_8700-NEXT:    flw fa2, 8(a0)
+; RV64D_8700-NEXT:    flw fa1, 12(a0)
+; RV64D_8700-NEXT:    tail sinkf
+entry:
+  %arrayidx = getelementptr inbounds float, float* %a, i64 1
+  %0 = load float, float* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float, float* %a, i64 3
+  %1 = load float, float* %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds float, float* %a, i64 2
+  %2 = load float, float* %arrayidx2, align 4
+  %3 = load float, float* %a, align 4
+  tail call void @sinkf(float noundef %0, float noundef %1, float noundef %2, float noundef %3)
+  ret void
+}
+
+declare dso_local void @sinkf(float noundef, float noundef, float noundef, float noundef) local_unnamed_addr
+
+define dso_local void @testd(double* nocapture noundef readonly %a) local_unnamed_addr #0 {
+; RV32I-LABEL: testd:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lw a4, 16(a0)
+; RV32I-NEXT:    lw a5, 20(a0)
+; RV32I-NEXT:    lw a2, 24(a0)
+; RV32I-NEXT:    lw a3, 28(a0)
+; RV32I-NEXT:    lw a6, 0(a0)
+; RV32I-NEXT:    lw a7, 4(a0)
+; RV32I-NEXT:    lw t0, 8(a0)
+; RV32I-NEXT:    lw a1, 12(a0)
+; RV32I-NEXT:    mv a0, t0
+; RV32I-NEXT:    tail sinkd
+;
+; RV32D-LABEL: testd:
+; RV32D:       # %bb.0: # %entry
+; RV32D-NEXT:    fld fa3, 0(a0)
+; RV32D-NEXT:    fld fa0, 8(a0)
+; RV32D-NEXT:    fld fa2, 16(a0)
+; RV32D-NEXT:    fld fa1, 24(a0)
+; RV32D-NEXT:    tail sinkd
+;
+; RV64I-LABEL: testd:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    ld a3, 0(a0)
+; RV64I-NEXT:    ld a4, 8(a0)
+; RV64I-NEXT:    ld a2, 16(a0)
+; RV64I-NEXT:    ld a1, 24(a0)
+; RV64I-NEXT:    mv a0, a4
+; RV64I-NEXT:    tail sinkd
+;
+; RV64D-LABEL: testd:
+; RV64D:       # %bb.0: # %entry
+; RV64D-NEXT:    fld fa3, 0(a0)
+; RV64D-NEXT:    fld fa0, 8(a0)
+; RV64D-NEXT:    fld fa2, 16(a0)
+; RV64D-NEXT:    fld fa1, 24(a0)
+; RV64D-NEXT:    tail sinkd
+;
+; RV32I_PAIR-LABEL: testd:
+; RV32I_PAIR:       # %bb.0: # %entry
+; RV32I_PAIR-NEXT:    mips.lwp a4, a5, 16(a0)
+; RV32I_PAIR-NEXT:    mips.lwp a2, a3, 24(a0)
+; RV32I_PAIR-NEXT:    mips.lwp a6, a7, 0(a0)
+; RV32I_PAIR-NEXT:    mips.lwp a0, a1, 8(a0)
+; RV32I_PAIR-NEXT:    tail sinkd
+;
+; RV32D_PAIR-LABEL: testd:
+; RV32D_PAIR:       # %bb.0: # %entry
+; RV32D_PAIR-NEXT:    fld fa3, 0(a0)
+; RV32D_PAIR-NEXT:    fld fa0, 8(a0)
+; RV32D_PAIR-NEXT:    fld fa2, 16(a0)
+; RV32D_PAIR-NEXT:    fld fa1, 24(a0)
+; RV32D_PAIR-NEXT:    tail sinkd
+;
+; RV64I_PAIR-LABEL: testd:
+; RV64I_PAIR:       # %bb.0: # %entry
+; RV64I_PAIR-NEXT:    ld a3, 0(a0)
+; RV64I_PAIR-NEXT:    ld a4, 8(a0)
+; RV64I_PAIR-NEXT:    ld a2, 16(a0)
+; RV64I_PAIR-NEXT:    ld a1, 24(a0)
+; RV64I_PAIR-NEXT:    mv a0, a4
+; RV64I_PAIR-NEXT:    tail sinkd
+;
+; RV64P_8700-LABEL: testd:
+; RV64P_8700:       # %bb.0: # %entry
+; RV64P_8700-NEXT:    fld fa3, 0(a0)
+; RV64P_8700-NEXT:    fld fa0, 8(a0)
+; RV64P_8700-NEXT:    fld fa2, 16(a0)
+; RV64P_8700-NEXT:    fld fa1, 24(a0)
+; RV64P_8700-NEXT:    tail sinkd
+;
+; RV64D_PAIR-LABEL: testd:
+; RV64D_PAIR:       # %bb.0: # %entry
+; RV64D_PAIR-NEXT:    fld fa3, 0(a0)
+; RV64D_PAIR-NEXT:    fld fa0, 8(a0)
+; RV64D_PAIR-NEXT:    fld fa2, 16(a0)
+; RV64D_PAIR-NEXT:    fld fa1, 24(a0)
+; RV64D_PAIR-NEXT:    tail sinkd
+;
+; RV64D_8700-LABEL: testd:
+; RV64D_8700:       # %bb.0: # %entry
+; RV64D_8700-NEXT:    fld fa3, 0(a0)
+; RV64D_8700-NEXT:    fld fa0, 8(a0)
+; RV64D_8700-NEXT:    fld fa2, 16(a0)
+; RV64D_8700-NEXT:    fld fa1, 24(a0)
+; RV64D_8700-NEXT:    tail sinkd
+entry:
+  %arrayidx = getelementptr inbounds double, double* %a, i64 1
+  %0 = load double, double* %arrayidx, align 8
+  %arrayidx1 = getelementptr inbounds double, double* %a, i64 3
+  %1 = load double, double* %arrayidx1, align 8
+  %arrayidx2 = getelementptr inbounds double, double* %a, i64 2
+  %2 = load double, double* %arrayidx2, align 8
+  %3 = load double, double* %a, align 8
+  tail call void @sinkd(double noundef %0, double noundef %1, double noundef %2, double noundef %3)
+  ret void
+}
+
+declare dso_local void @sinkd(double noundef, double noundef, double noundef, double noundef) local_unnamed_addr

>From 23f0f313d8e5f6ef96fb7bac01df4efa5612a40f Mon Sep 17 00:00:00 2001
From: Djordje Todorovic <djordje.todorovic at htecgroup.com>
Date: Tue, 28 Jan 2025 09:07:06 +0100
Subject: [PATCH 2/3] fixup: Alphabetize cmake file

---
 llvm/lib/Target/RISCV/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
index 815b6ccf54fd0..49d8aa04030b9 100644
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -47,8 +47,8 @@ add_llvm_target(RISCVCodeGen
   RISCVISelDAGToDAG.cpp
   RISCVISelLowering.cpp
   RISCVLandingPadSetup.cpp
-  RISCVMachineFunctionInfo.cpp
   RISCVLoadStoreOptimizer.cpp
+  RISCVMachineFunctionInfo.cpp
   RISCVMergeBaseOffset.cpp
   RISCVOptWInstrs.cpp
   RISCVPostRAExpandPseudoInsts.cpp

>From 7e2c1432696b8f6dd91b2d76ee606e55e777a863 Mon Sep 17 00:00:00 2001
From: Djordje Todorovic <djordje.todorovic at htecgroup.com>
Date: Wed, 5 Feb 2025 10:43:24 +0100
Subject: [PATCH 3/3] fixup: Address comments

It was not very obvious that bonding and pairing are two different
things, so it is improved now. In addition, mentioned the AArch64
optimization and similarity with it.
---
 .../Target/RISCV/RISCVLoadStoreOptimizer.cpp  | 65 ++++++++++++++-----
 llvm/lib/Target/RISCV/RISCVSubtarget.cpp      | 13 +++-
 llvm/lib/Target/RISCV/RISCVSubtarget.h        |  1 +
 llvm/lib/Target/RISCV/RISCVTargetMachine.cpp  |  9 +--
 llvm/test/CodeGen/RISCV/O3-pipeline.ll        |  2 +-
 llvm/test/CodeGen/RISCV/load-store-pair.ll    |  1 +
 6 files changed, 68 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
index 1421ede686d26..c3568e76027a8 100644
--- a/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
@@ -6,8 +6,25 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// Bundle loads and stores that operate on consecutive memory locations to take
-// the advantage of hardware load/store bonding.
+// This pass implements two key optimizations for RISC-V memory accesses:
+//   1. Load/Store Pairing: It identifies pairs of load or store instructions
+//      operating on consecutive memory locations and merges them into a single
+//      paired instruction, taking advantage of hardware support for paired
+//      accesses. Much of the pairing logic is adapted from the
+//      AArch64LoadStoreOpt pass.
+//   2. Load/Store Bonding: When direct pairing cannot be applied, the pass
+//   bonds related memory instructions together into a bundle. This preserves
+//   their proximity and prevents reordering that might violate memory
+//   semantics. This technique benefits certain targets (e.g. MIPS P8700) by
+//   ensuring that paired or bonded memory operations remain contiguous.
+//
+// NOTE: The AArch64LoadStoreOpt pass performs additional optimizations such as
+//       merging zero store instructions, promoting loads that read directly
+//       from a preceding store, and merging base register updates with
+//       load/store instructions (via pre-/post-indexed addressing). These
+//       advanced transformations are not yet implemented in the RISC-V pass but
+//       represent potential future enhancements, as similar benefits could be
+//       achieved on RISC-V architectures.
 //
 //===----------------------------------------------------------------------===//
 
@@ -22,7 +39,7 @@
 using namespace llvm;
 
 #define DEBUG_TYPE "riscv-load-store-opt"
-#define RISCV_LOAD_STORE_OPT_NAME "RISCV Load / Store Optimizer"
+#define RISCV_LOAD_STORE_OPT_NAME "RISC-V Load / Store Optimizer"
 namespace {
 
 struct RISCVLoadStoreOpt : public MachineFunctionPass {
@@ -66,6 +83,8 @@ struct RISCVLoadStoreOpt : public MachineFunctionPass {
   const RISCVInstrInfo *TII;
   const RISCVRegisterInfo *TRI;
   LiveRegUnits ModifiedRegUnits, UsedRegUnits;
+  bool EnableLoadStorePairs = false;
+  bool EnableLoadStoreBonding = false;
 };
 } // end anonymous namespace
 
@@ -77,7 +96,9 @@ bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
   if (skipFunction(Fn.getFunction()))
     return false;
   const RISCVSubtarget &Subtarget = Fn.getSubtarget<RISCVSubtarget>();
-  if (!Subtarget.useLoadStorePairs())
+  EnableLoadStorePairs = Subtarget.useLoadStorePairs();
+  EnableLoadStoreBonding = Subtarget.useMIPSLoadStoreBonding();
+  if (!EnableLoadStorePairs && !EnableLoadStoreBonding)
     return false;
 
   bool MadeChange = false;
@@ -107,12 +128,16 @@ bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
 // instruction.
 bool RISCVLoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
   MachineInstr &MI = *MBBI;
-  MachineBasicBlock::iterator E = MI.getParent()->end();
+
+  // If this is volatile, it is not a candidate.
+  if (MI.hasOrderedMemoryRef())
+    return false;
 
   if (!TII->isLdStSafeToPair(MI, TRI))
     return false;
 
   // Look ahead for a pairable instruction.
+  MachineBasicBlock::iterator E = MI.getParent()->end();
   bool MergeForward;
   MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, MergeForward);
   if (Paired != E) {
@@ -130,16 +155,16 @@ bool RISCVLoadStoreOpt::tryConvertToLdStPair(
   default:
     return false;
   case RISCV::SW:
-    PairOpc = RISCV::SWP;
+    PairOpc = RISCV::MIPS_SWP;
     break;
   case RISCV::LW:
-    PairOpc = RISCV::LWP;
+    PairOpc = RISCV::MIPS_LWP;
     break;
   case RISCV::SD:
-    PairOpc = RISCV::SDP;
+    PairOpc = RISCV::MIPS_SDP;
     break;
   case RISCV::LD:
-    PairOpc = RISCV::LDP;
+    PairOpc = RISCV::MIPS_LDP;
     break;
   }
 
@@ -201,8 +226,8 @@ RISCVLoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
   bool MayLoad = FirstMI.mayLoad();
   Register Reg = FirstMI.getOperand(0).getReg();
   Register BaseReg = FirstMI.getOperand(1).getReg();
-  int Offset = FirstMI.getOperand(2).getImm();
-  int OffsetStride = (*FirstMI.memoperands_begin())->getSize().getValue();
+  int64_t Offset = FirstMI.getOperand(2).getImm();
+  int64_t OffsetStride = (*FirstMI.memoperands_begin())->getSize().getValue();
 
   MergeForward = false;
 
@@ -226,10 +251,9 @@ RISCVLoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
     if (MI.getOpcode() == FirstMI.getOpcode() &&
         TII->isLdStSafeToPair(MI, TRI)) {
       Register MIBaseReg = MI.getOperand(1).getReg();
-      int MIOffset = MI.getOperand(2).getImm();
+      int64_t MIOffset = MI.getOperand(2).getImm();
 
       if (BaseReg == MIBaseReg) {
-
         if ((Offset != MIOffset + OffsetStride) &&
             (Offset + OffsetStride != MIOffset)) {
           LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
@@ -349,12 +373,21 @@ RISCVLoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
     First = InsertionPoint;
   }
 
-  if (!tryConvertToLdStPair(First, Second))
+  // It may pair them or creaate bundles. The instructions may still be bundled
+  // together, preserving their proximity and the intent of keeping related
+  // memory accesses together. This bundling can help subsequent passes maintain
+  // any implicit ordering or avoid reordering that might violate memory
+  // semantics. For exmaple, MIPS P8700 benefits from it.
+  if (EnableLoadStorePairs && tryConvertToLdStPair(First, Second)) {
+    LLVM_DEBUG(dbgs() << "Pairing load/store:\n    ");
+    LLVM_DEBUG(prev_nodbg(NextI, MBB.begin())->print(dbgs()));
+  } else if (EnableLoadStoreBonding) {
     finalizeBundle(MBB, First.getInstrIterator(),
                    std::next(Second).getInstrIterator());
+    LLVM_DEBUG(dbgs() << "Bonding load/store:\n    ");
+    LLVM_DEBUG(prev_nodbg(NextI, MBB.begin())->print(dbgs()));
+  }
 
-  LLVM_DEBUG(dbgs() << "Bonding pair load/store:\n    ");
-  LLVM_DEBUG(prev_nodbg(NextI, MBB.begin())->print(dbgs()));
   return NextI;
 }
 
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index d7439c50be076..448dc180178ae 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -62,10 +62,15 @@ static cl::opt<unsigned> RISCVMinimumJumpTableEntries(
     "riscv-min-jump-table-entries", cl::Hidden,
     cl::desc("Set minimum number of entries to use a jump table on RISCV"));
 
+static cl::opt<bool> UseMIPSLoadStorePairsOpt(
+    "mips-riscv-load-store-pairs",
+    cl::desc("RISCV: Enable the load/store pair optimization pass"),
+    cl::init(false), cl::Hidden);
+
 static cl::opt<bool>
-    UseMIPSLoadStorePairsOpt("mips-riscv-load-store-pairs",
+    UseMIPSLoadStoreBondingOpt("mips-riscv-load-store-bonding",
                              cl::desc("RISCV: Optimize for load-store bonding"),
-                             cl::init(false), cl::Hidden);
+                             cl::init(true), cl::Hidden);
 
 static cl::opt<bool>
     UseCCMovInsn("riscv-ccmov", cl::desc("RISCV: Use 'mips.ccmov' instruction"),
@@ -252,6 +257,10 @@ bool RISCVSubtarget::useLoadStorePairs() const {
   return UseMIPSLoadStorePairsOpt && HasVendorXMIPSLSP;
 }
 
+bool RISCVSubtarget::useMIPSLoadStoreBonding() const {
+  return UseMIPSLoadStoreBondingOpt && HasVendorXMIPSLSP;
+}
+
 bool RISCVSubtarget::useCCMovInsn() const {
   return UseCCMovInsn && HasVendorXMIPSCMove;
 }
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index a059ed9202e33..38cbd184fcc3e 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -189,6 +189,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
     return is64Bit() ? 64 : 32;
   }
   bool useLoadStorePairs() const;
+  bool useMIPSLoadStoreBonding() const;
   bool useCCMovInsn() const;
   unsigned getFLen() const {
     if (HasStdExtD)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index c98c7891b045d..220bd8a311ae9 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -312,11 +312,12 @@ ScheduleDAGInstrs *
 RISCVTargetMachine::createPostMachineScheduler(MachineSchedContext *C) const {
   ScheduleDAGMI *DAG = nullptr;
   const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
-  bool EnableLoadStoreClusteringForLoadStorePairOpt =
-      !ST.getMacroFusions().empty() && ST.useLoadStorePairs();
+  bool EnableLoadStoreClusteringForLoadStoreOpt =
+      !ST.getMacroFusions().empty() &&
+      (ST.useLoadStorePairs() || ST.useMIPSLoadStoreBonding());
 
   if (EnablePostMISchedLoadStoreClustering ||
-      EnableLoadStoreClusteringForLoadStorePairOpt) {
+      EnableLoadStoreClusteringForLoadStoreOpt) {
     DAG = createGenericSchedPostRA(C);
     DAG->addMutation(createLoadClusterDAGMutation(
         DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
@@ -574,7 +575,7 @@ void RISCVPassConfig::addPreEmitPass() {
 
   // LoadStoreOptimizer creates bundles for load-store bonding.
   addPass(createUnpackMachineBundles([](const MachineFunction &MF) {
-    return MF.getSubtarget<RISCVSubtarget>().useLoadStorePairs();
+    return MF.getSubtarget<RISCVSubtarget>().useMIPSLoadStoreBonding();
   }));
 }
 
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index e903e04ed3ec2..54138128963aa 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -181,7 +181,7 @@
 ; CHECK-NEXT:       Post-RA pseudo instruction expansion pass
 ; CHECK-NEXT:       RISC-V post-regalloc pseudo instruction expansion pass
 ; CHECK-NEXT:       Insert KCFI indirect call checks
-; CHECK-NEXT:       RISCV Load / Store Optimizer
+; CHECK-NEXT:       RISC-V Load / Store Optimizer
 ; CHECK-NEXT:       MachineDominator Tree Construction
 ; CHECK-NEXT:       Machine Natural Loop Construction
 ; CHECK-NEXT:       PostRA Machine Instruction Scheduler
diff --git a/llvm/test/CodeGen/RISCV/load-store-pair.ll b/llvm/test/CodeGen/RISCV/load-store-pair.ll
index 4f415e789c14b..d0282004af5a8 100644
--- a/llvm/test/CodeGen/RISCV/load-store-pair.ll
+++ b/llvm/test/CodeGen/RISCV/load-store-pair.ll
@@ -1,4 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; testd and testf look for bonding only.
 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s -check-prefix=RV32I
 ; RUN: llc -mtriple=riscv32 -target-abi ilp32d -mattr=+d -verify-machineinstrs < %s \