[llvm] [llvm][RISCV] Implement Zilsd load/store pair optimization (PR #158640)

Brandon Wu via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 25 03:58:56 PDT 2025


================
@@ -0,0 +1,765 @@
+//===-- RISCVZilsdOptimizer.cpp - RISC-V Zilsd Load/Store Optimizer ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that performs load/store optimizations for the
+// RISC-V Zilsd extension. It combines pairs of 32-bit load/store instructions
+// into single 64-bit LD/SD instructions when possible.
+//
+// The pass runs in two phases:
+// 1. Pre-allocation: Reschedules loads/stores to bring consecutive memory
+//    accesses closer together and forms LD/SD pairs with register hints.
+// 2. Post-allocation: Fixes invalid LD/SD instructions if register allocation
+//    didn't provide suitable consecutive registers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVInstrInfo.h"
+#include "RISCVMachineFunctionInfo.h"
+#include "RISCVRegisterInfo.h"
+#include "RISCVSubtarget.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include <algorithm>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-zilsd-opt"
+
+STATISTIC(NumLDFormed, "Number of LD instructions formed");
+STATISTIC(NumSDFormed, "Number of SD instructions formed");
+STATISTIC(NumLD2LW, "Number of LD instructions split back to LW");
+STATISTIC(NumSD2SW, "Number of SD instructions split back to SW");
+
+static cl::opt<bool>
+    DisableZilsdOpt("disable-riscv-zilsd-opt", cl::Hidden, cl::init(false),
+                    cl::desc("Disable Zilsd load/store optimization"));
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+// Pre-allocation Zilsd optimization pass
+//===----------------------------------------------------------------------===//
+class RISCVPreAllocZilsdOpt : public MachineFunctionPass {
+public:
+  static char ID;
+
+  RISCVPreAllocZilsdOpt() : MachineFunctionPass(ID) {}
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  StringRef getPassName() const override {
+    return "RISC-V pre-allocation Zilsd load/store optimization";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<AAResultsWrapperPass>();
+    AU.addRequired<MachineDominatorTreeWrapperPass>();
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+private:
+  bool isMemoryOp(const MachineInstr &MI);
+  bool rescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
+  bool canFormLdSdPair(MachineInstr *Op0, MachineInstr *Op1, unsigned &NewOpc,
+                       Register &FirstReg, Register &SecondReg,
+                       Register &BaseReg, int &Offset);
+  bool rescheduleOps(MachineBasicBlock *MBB,
+                     SmallVectorImpl<MachineInstr *> &Ops, unsigned Base,
+                     bool isLoad,
+                     DenseMap<MachineInstr *, unsigned> &MI2LocMap);
+  bool isSafeToMove(MachineInstr *MI, MachineInstr *Target, bool MoveForward);
+  int getMemoryOpOffset(const MachineInstr &MI);
+
+  const RISCVSubtarget *STI;
+  const RISCVInstrInfo *TII;
+  const RISCVRegisterInfo *TRI;
+  MachineRegisterInfo *MRI;
+  AliasAnalysis *AA;
+  MachineDominatorTree *DT;
+};
+
+//===----------------------------------------------------------------------===//
+// Post-allocation Zilsd optimization pass
+//===----------------------------------------------------------------------===//
+class RISCVPostAllocZilsdOpt : public MachineFunctionPass {
+public:
+  static char ID;
+
+  RISCVPostAllocZilsdOpt() : MachineFunctionPass(ID) {}
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  StringRef getPassName() const override {
+    return "RISC-V post-allocation Zilsd load/store optimization";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+private:
+  bool fixInvalidRegPairOp(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator &MBBI);
+  bool isConsecutiveRegPair(Register First, Register Second);
+  void splitLdSdIntoTwo(MachineBasicBlock &MBB,
+                        MachineBasicBlock::iterator &MBBI, bool isLoad);
+
+  const RISCVSubtarget *STI;
+  const RISCVInstrInfo *TII;
+  const RISCVRegisterInfo *TRI;
+  MachineRegisterInfo *MRI;
+};
+
+} // end anonymous namespace
+
+char RISCVPreAllocZilsdOpt::ID = 0;
+char RISCVPostAllocZilsdOpt::ID = 0;
+
+INITIALIZE_PASS_BEGIN(RISCVPreAllocZilsdOpt, "riscv-prera-zilsd-opt",
+                      "RISC-V pre-allocation Zilsd optimization", false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
+INITIALIZE_PASS_END(RISCVPreAllocZilsdOpt, "riscv-prera-zilsd-opt",
+                    "RISC-V pre-allocation Zilsd optimization", false, false)
+
+INITIALIZE_PASS(RISCVPostAllocZilsdOpt, "riscv-postra-zilsd-opt",
+                "RISC-V post-allocation Zilsd optimization", false, false)
+
+//===----------------------------------------------------------------------===//
+// Pre-allocation pass implementation
+//===----------------------------------------------------------------------===//
+
+bool RISCVPreAllocZilsdOpt::runOnMachineFunction(MachineFunction &MF) {
+
+  if (DisableZilsdOpt || skipFunction(MF.getFunction()))
+    return false;
+
+  STI = &MF.getSubtarget<RISCVSubtarget>();
+
+  // Only run on RV32 with Zilsd extension
+  if (STI->is64Bit() || !STI->hasStdExtZilsd())
+    return false;
+
+  TII = STI->getInstrInfo();
+  TRI = STI->getRegisterInfo();
+  MRI = &MF.getRegInfo();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+  DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
+
+  bool Modified = false;
+  for (auto &MBB : MF) {
+    Modified |= rescheduleLoadStoreInstrs(&MBB);
+  }
+
+  return Modified;
+}
+
+int RISCVPreAllocZilsdOpt::getMemoryOpOffset(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+  case RISCV::LW:
+  case RISCV::SW:
+    // For LW/SW, the offset is in operand 2
+    if (MI.getOperand(2).isImm())
+      return MI.getOperand(2).getImm();
+    break;
+  default:
+    break;
+  }
+  return 0;
+}
+
+bool RISCVPreAllocZilsdOpt::canFormLdSdPair(MachineInstr *Op0,
+                                            MachineInstr *Op1, unsigned &NewOpc,
+                                            Register &FirstReg,
+                                            Register &SecondReg,
+                                            Register &BaseReg, int &Offset) {
+
+  unsigned Opcode = Op0->getOpcode();
+
+  // Check if we have two LW or two SW instructions
+  if (Opcode != Op1->getOpcode())
+    return false;
+
+  if (Opcode == RISCV::LW) {
+    NewOpc = RISCV::PseudoLD_RV32_OPT;
+  } else if (Opcode == RISCV::SW) {
+    NewOpc = RISCV::PseudoSD_RV32_OPT;
+  } else {
+    return false;
+  }
+
+  if (!Op0->hasOneMemOperand() || !Op1->hasOneMemOperand())
+    return false;
+
+  // Get offsets and check they are consecutive
+  int Offset0 = getMemoryOpOffset(*Op0);
+  int Offset1 = getMemoryOpOffset(*Op1);
+
+  // Offsets must be 4 bytes apart
+  if (std::abs(Offset1 - Offset0) != 4)
+    return false;
+
+  // Make sure we have the same base register
+  Register Base0 = Op0->getOperand(1).getReg();
+  Register Base1 = Op1->getOperand(1).getReg();
+  if (Base0 != Base1)
+    return false;
+
+  // Set output parameters
+  if (Offset0 < Offset1) {
+    FirstReg = Op0->getOperand(0).getReg();
+    SecondReg = Op1->getOperand(0).getReg();
+    Offset = Offset0;
+  } else {
+    FirstReg = Op1->getOperand(0).getReg();
+    SecondReg = Op0->getOperand(0).getReg();
+    Offset = Offset1;
+  }
+
+  BaseReg = Base0;
+
+  // Check that the two destination registers are different
+  if (FirstReg == SecondReg)
+    return false;
+
+  // For loads, check that neither destination register is the same as the base
+  // register This prevents register reuse issues where the first load
+  // overwrites the base
+  if (Opcode == RISCV::LW) {
+    if (FirstReg == BaseReg || SecondReg == BaseReg)
+      return false;
+  }
+
+  return true;
+}
+
+bool RISCVPreAllocZilsdOpt::isSafeToMove(MachineInstr *MI, MachineInstr *Target,
+                                         bool MoveForward) {
+  // Enhanced safety check with call and terminator handling
+
+  MachineBasicBlock *MBB = MI->getParent();
+  MachineBasicBlock::iterator Start = MI->getIterator();
+  MachineBasicBlock::iterator End = Target->getIterator();
+
+  if (!MoveForward) {
+    std::swap(Start, End);
+  }
+
+  // Increment Start to skip the current instruction
+  if (Start != MBB->end())
+    ++Start;
+
+  Register DefReg = MI->getOperand(0).getReg();
+  Register BaseReg = MI->getOperand(1).getReg();
+
+  unsigned ScanCount = 0;
+  for (auto It = Start; It != End; ++It, ++ScanCount) {
+    // Don't move across calls or terminators
+    if (It->isCall() || It->isTerminator()) {
+      LLVM_DEBUG(dbgs() << "Cannot move across call/terminator: " << *It);
+      return false;
+    }
+
+    // Don't move across instructions that modify memory barrier
+    if (It->hasUnmodeledSideEffects()) {
+      LLVM_DEBUG(dbgs() << "Cannot move across instruction with side effects: "
+                        << *It);
+      return false;
+    }
+
+    // Check if the base register is modified
+    if (It->modifiesRegister(BaseReg, TRI)) {
+      LLVM_DEBUG(dbgs() << "Base register " << BaseReg
+                        << " modified by: " << *It);
+      return false;
+    }
+
+    // For loads, check if the loaded value is used
+    if (MI->mayLoad() &&
+        (It->readsRegister(DefReg, TRI) || It->modifiesRegister(DefReg, TRI))) {
+      LLVM_DEBUG(dbgs() << "Destination register " << DefReg
+                        << " used by: " << *It);
+      return false;
+    }
+
+    // For stores, check if the stored register is modified
+    if (MI->mayStore() && It->modifiesRegister(DefReg, TRI)) {
+      LLVM_DEBUG(dbgs() << "Source register " << DefReg
+                        << " modified by: " << *It);
+      return false;
+    }
+
+    // Check for memory operation interference
+    if (MI->mayLoadOrStore() && It->mayLoadOrStore() &&
+        It->mayAlias(AA, *MI, /*UseTBAA*/ false)) {
+      LLVM_DEBUG(dbgs() << "Memory operation interference detected\n");
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool RISCVPreAllocZilsdOpt::rescheduleOps(
+    MachineBasicBlock *MBB, SmallVectorImpl<MachineInstr *> &Ops, unsigned Base,
+    bool isLoad, DenseMap<MachineInstr *, unsigned> &MI2LocMap) {
+
+  if (Ops.size() < 2)
+    return false;
+
+  // Sort by offset
+  std::sort(Ops.begin(), Ops.end(), [this](MachineInstr *A, MachineInstr *B) {
+    return getMemoryOpOffset(*A) < getMemoryOpOffset(*B);
+  });
+
+  bool Modified = false;
+
+  // Try to pair consecutive operations
+  for (size_t i = 0; i + 1 < Ops.size(); i++) {
+    MachineInstr *Op0 = Ops[i];
+    MachineInstr *Op1 = Ops[i + 1];
+
+    // Skip if either instruction was already processed
+    if (!Op0->getParent() || !Op1->getParent())
+      continue;
+
+    unsigned NewOpc;
+    Register FirstReg, SecondReg, BaseReg;
+    int Offset;
+
+    if (!canFormLdSdPair(Op0, Op1, NewOpc, FirstReg, SecondReg, BaseReg,
+                         Offset))
+      continue;
+
+    // Check if we can safely and profitably move the instructions together
+    SmallPtrSet<MachineInstr *, 4> MemOps;
+    SmallSet<unsigned, 4> MemRegs;
+    MemOps.insert(Op0);
+    MemRegs.insert(Op0->getOperand(0).getReg().id());
+
+    // Use MI2LocMap to determine which instruction appears later in program
+    // order
+    bool Op1IsLater = MI2LocMap[Op1] > MI2LocMap[Op0];
+
+    // For loads: move later instruction up (backwards) to earlier instruction
+    // For stores: move earlier instruction down (forwards) to later instruction
+    MachineInstr *MoveInstr, *TargetInstr;
+    if (isLoad) {
+      // For loads: move the later instruction to the earlier one
+      MoveInstr = Op1IsLater ? Op1 : Op0;
+      TargetInstr = Op1IsLater ? Op0 : Op1;
+    } else {
+      // For stores: move the earlier instruction to the later one
+      MoveInstr = Op1IsLater ? Op0 : Op1;
+      TargetInstr = Op1IsLater ? Op1 : Op0;
+    }
+
+    unsigned Distance = Op1IsLater ? MI2LocMap[Op1] - MI2LocMap[Op0]
+                                   : MI2LocMap[Op0] - MI2LocMap[Op1];
+    // FIXME: Decide what's maximum distance
+    if (!isSafeToMove(MoveInstr, TargetInstr, !isLoad) || Distance > 10)
----------------
4vtomat wrote:

@djtodoro I think it's always positive since we always subtract former one by latter one 

https://github.com/llvm/llvm-project/pull/158640


More information about the llvm-commits mailing list