[llvm] [RISCV] Generate MIPS load/store pair instructions (PR #124717)

Djordje Todorovic via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 25 03:52:30 PST 2025


================
@@ -0,0 +1,397 @@
+//===----- RISCVLoadStoreOptimizer.cpp ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Load/Store Pairing: It identifies pairs of load or store instructions
+// operating on consecutive memory locations and merges them into a single
+// paired instruction, leveraging hardware support for paired memory accesses.
+// Much of the pairing logic is adapted from the AArch64LoadStoreOpt pass.
+//
+// NOTE: The AArch64LoadStoreOpt pass performs additional optimizations such as
+// merging zero store instructions, promoting loads that read directly from a
+// preceding store, and merging base register updates with load/store
+// instructions (via pre-/post-indexed addressing). These advanced
+// transformations are not yet implemented in the RISC-V pass but represent
+// potential future enhancements for further optimizing RISC-V memory
+// operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVTargetMachine.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-load-store-opt"
+#define RISCV_LOAD_STORE_OPT_NAME "RISC-V Load / Store Optimizer"
+
+// The LdStLimit limits number of instructions how far we search for load/store
+// pairs.
+static cl::opt<unsigned> LdStLimit("riscv-load-store-scan-limit", cl::init(128),
+                                   cl::Hidden);
+
+namespace {
+
+struct RISCVLoadStoreOpt : public MachineFunctionPass {
+  static char ID;
+  bool runOnMachineFunction(MachineFunction &Fn) override;
+
+  RISCVLoadStoreOpt() : MachineFunctionPass(ID) {}
+
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::NoVRegs);
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<AAResultsWrapperPass>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  StringRef getPassName() const override { return RISCV_LOAD_STORE_OPT_NAME; }
+
+  // Find and pair load/store instructions.
+  bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
+
+  // Convert load/store pairs to single instructions.
+  bool tryConvertToLdStPair(MachineBasicBlock::iterator First,
+                            MachineBasicBlock::iterator Second);
+
+  // Scan the instructions looking for a load/store that can be combined
+  // with the current instruction into a load/store pair.
+  // Return the matching instruction if one is found, else MBB->end().
+  MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
+                                               bool &MergeForward);
+
+  MachineBasicBlock::iterator
+  mergePairedInsns(MachineBasicBlock::iterator I,
+                   MachineBasicBlock::iterator Paired, bool MergeForward);
+
+private:
+  AliasAnalysis *AA;
+  MachineRegisterInfo *MRI;
+  const RISCVInstrInfo *TII;
+  const RISCVRegisterInfo *TRI;
+  LiveRegUnits ModifiedRegUnits, UsedRegUnits;
+};
+} // end anonymous namespace
+
+char RISCVLoadStoreOpt::ID = 0;
+INITIALIZE_PASS(RISCVLoadStoreOpt, DEBUG_TYPE, RISCV_LOAD_STORE_OPT_NAME, false,
+                false)
+
+bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+  if (skipFunction(Fn.getFunction()))
+    return false;
+  const RISCVSubtarget &Subtarget = Fn.getSubtarget<RISCVSubtarget>();
+  if (!Subtarget.useLoadStorePairs())
+    return false;
+
+  bool MadeChange = false;
+  TII = Subtarget.getInstrInfo();
+  TRI = Subtarget.getRegisterInfo();
+  MRI = &Fn.getRegInfo();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+  ModifiedRegUnits.init(*TRI);
+  UsedRegUnits.init(*TRI);
+
+  for (MachineBasicBlock &MBB : Fn) {
+    LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
+
+    for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+         MBBI != E;) {
+      if (TII->isPairableLdStInstOpc(MBBI->getOpcode()) &&
+          tryToPairLdStInst(MBBI))
+        MadeChange = true;
+      else
+        ++MBBI;
+    }
+  }
+  return MadeChange;
+}
+
+// Find loads and stores that can be merged into a single load or store pair
+// instruction.
+bool RISCVLoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
+  MachineInstr &MI = *MBBI;
+
+  // If this is volatile, it is not a candidate.
+  if (MI.hasOrderedMemoryRef())
+    return false;
+
+  if (!TII->isLdStSafeToPair(MI, TRI))
+    return false;
+
+  // Look ahead for a pairable instruction.
+  MachineBasicBlock::iterator E = MI.getParent()->end();
+  bool MergeForward;
+  MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, MergeForward);
+  if (Paired != E) {
+    MBBI = mergePairedInsns(MBBI, Paired, MergeForward);
+    return true;
+  }
+  return false;
+}
+
+// Merge two adjacent load/store instructions into a paired instruction
+// (LDP/SDP/SWP/LWP) if the effective address is 16-byte aligned. This function
+// selects the appropriate paired opcode, verifies that the memory operand (or
+// fixed-stack slot) is 16-byte aligned, and checks that the offset is valid. If
----------------
djtodoro wrote:

Well, for this CPU:

```
Description: An LDP instruction is guaranteed to atomically read 16-byte data if its address is 16-byte aligned. If
the address is 8-byte aligned but not 16-byte aligned, then the data is only guaranteed to be 8-byte atomic. For any
other alignment, there is no atomicity guarantee.
A Load Address Misaligned exception may occur if the virtual address targeted by the LDP instruction is not
aligned to a 16-byte boundary. Performance optimized implementations will provide native hardware support for 8-
byte aligned cases, including cases that cross a page boundary. When $rd1 and $rd2 are the same register, the
value written to the output register is unknown.
```

https://github.com/llvm/llvm-project/pull/124717


More information about the llvm-commits mailing list