[llvm] 645e0dc - [llvm][RISCV] Implement Zilsd load/store pair optimization (#158640)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 20 21:04:10 PST 2025
Author: Brandon Wu
Date: 2025-11-21T05:04:05Z
New Revision: 645e0dcbff33248bd2cdc4ac258420de9b5936b1
URL: https://github.com/llvm/llvm-project/commit/645e0dcbff33248bd2cdc4ac258420de9b5936b1
DIFF: https://github.com/llvm/llvm-project/commit/645e0dcbff33248bd2cdc4ac258420de9b5936b1.diff
LOG: [llvm][RISCV] Implement Zilsd load/store pair optimization (#158640)
This commit implements a complete load/store optimization pass for the
RISC-V Zilsd extension, which combines pairs of 32-bit load/store
instructions into single 64-bit LD/SD instructions when possible.
Default alignment is 8, it also provide zilsd-4byte-align feature for
looser condition.
Related work: https://reviews.llvm.org/D144002
---------
Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
Added:
llvm/lib/Target/RISCV/RISCVZilsdOptimizer.cpp
llvm/test/CodeGen/RISCV/zilsd-ldst-opt-postra.mir
llvm/test/CodeGen/RISCV/zilsd-ldst-opt-prera.mir
llvm/test/CodeGen/RISCV/zilsd-regalloc-hints.mir
Modified:
llvm/lib/Target/RISCV/CMakeLists.txt
llvm/lib/Target/RISCV/RISCV.h
llvm/lib/Target/RISCV/RISCVFeatures.td
llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td
llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
llvm/lib/Target/RISCV/RISCVRegisterInfo.h
llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
llvm/test/CodeGen/RISCV/O3-pipeline.ll
llvm/test/CodeGen/RISCV/features-info.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
index e9088a4d9275c..f8cf71ea077d6 100644
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -72,6 +72,7 @@ add_llvm_target(RISCVCodeGen
RISCVVLOptimizer.cpp
RISCVVMV0Elimination.cpp
RISCVZacasABIFix.cpp
+ RISCVZilsdOptimizer.cpp
GISel/RISCVCallLowering.cpp
GISel/RISCVInstructionSelector.cpp
GISel/RISCVLegalizerInfo.cpp
diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h
index 938c62e4f9f94..048db205e2289 100644
--- a/llvm/lib/Target/RISCV/RISCV.h
+++ b/llvm/lib/Target/RISCV/RISCV.h
@@ -102,6 +102,9 @@ void initializeRISCVPushPopOptPass(PassRegistry &);
FunctionPass *createRISCVLoadStoreOptPass();
void initializeRISCVLoadStoreOptPass(PassRegistry &);
+FunctionPass *createRISCVPreAllocZilsdOptPass();
+void initializeRISCVPreAllocZilsdOptPass(PassRegistry &);
+
FunctionPass *createRISCVZacasABIFixPass();
void initializeRISCVZacasABIFixPass(PassRegistry &);
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 0b964c4808d8a..bf1caafc2f9ba 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -194,6 +194,10 @@ def HasStdExtZilsd : Predicate<"Subtarget->hasStdExtZilsd()">,
AssemblerPredicate<(all_of FeatureStdExtZilsd),
"'Zilsd' (Load/Store pair instructions)">;
+def FeatureZilsd4ByteAlign
+ : SubtargetFeature<"zilsd-4byte-align", "AllowZilsd4ByteAlign", "true",
+ "Allow 4-byte alignment for Zilsd LD/SD instructions">;
+
// Multiply Extensions
def FeatureStdExtZmmul
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td
index a3203f288b545..4fc859f2547c1 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td
@@ -47,6 +47,23 @@ let Predicates = [HasStdExtZilsd, IsRV32] in {
def PseudoLD_RV32 : PseudoLoad<"ld", GPRPairRV32>;
def PseudoSD_RV32 : PseudoStore<"sd", GPRPairRV32>;
+// Pseudo instructions for load/store optimization with 2 separate registers
+def PseudoLD_RV32_OPT :
+ Pseudo<(outs GPR:$rd1, GPR:$rd2),
+ (ins GPR:$rs1, simm12_lo:$imm12), [], "", ""> {
+ let hasSideEffects = 0;
+ let mayLoad = 1;
+ let mayStore = 0;
+}
+
+def PseudoSD_RV32_OPT :
+ Pseudo<(outs),
+ (ins GPR:$rs1, GPR:$rs2, GPR:$rs3, simm12_lo:$imm12), [], "", ""> {
+ let hasSideEffects = 0;
+ let mayLoad = 0;
+ let mayStore = 1;
+}
+
def : InstAlias<"ld $rd, (${rs1})", (LD_RV32 GPRPairRV32:$rd, GPR:$rs1, 0), 0>;
def : InstAlias<"sd $rs2, (${rs1})", (SD_RV32 GPRPairRV32:$rs2, GPR:$rs1, 0), 0>;
}
diff --git a/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
index 115a96e01f6c9..a22ab6bfc04b8 100644
--- a/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
@@ -11,6 +11,9 @@
// paired instruction, leveraging hardware support for paired memory accesses.
// Much of the pairing logic is adapted from the AArch64LoadStoreOpt pass.
//
+// Post-allocation Zilsd decomposition: Fixes invalid LD/SD instructions if
+// register allocation didn't provide suitable consecutive registers.
+//
// NOTE: The AArch64LoadStoreOpt pass performs additional optimizations such as
// merging zero store instructions, promoting loads that read directly from a
// preceding store, and merging base register updates with load/store
@@ -23,6 +26,7 @@
#include "RISCV.h"
#include "RISCVTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/MC/TargetRegistry.h"
@@ -38,6 +42,8 @@ using namespace llvm;
// pairs.
static cl::opt<unsigned> LdStLimit("riscv-load-store-scan-limit", cl::init(128),
cl::Hidden);
+STATISTIC(NumLD2LW, "Number of LD instructions split back to LW");
+STATISTIC(NumSD2SW, "Number of SD instructions split back to SW");
namespace {
@@ -75,6 +81,13 @@ struct RISCVLoadStoreOpt : public MachineFunctionPass {
mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired, bool MergeForward);
+ // Post reg-alloc zilsd part
+ bool fixInvalidRegPairOp(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI);
+ bool isValidZilsdRegPair(Register First, Register Second);
+ void splitLdSdIntoTwo(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, bool IsLoad);
+
private:
AliasAnalysis *AA;
MachineRegisterInfo *MRI;
@@ -92,8 +105,6 @@ bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
if (skipFunction(Fn.getFunction()))
return false;
const RISCVSubtarget &Subtarget = Fn.getSubtarget<RISCVSubtarget>();
- if (!Subtarget.useMIPSLoadStorePairs())
- return false;
bool MadeChange = false;
TII = Subtarget.getInstrInfo();
@@ -103,18 +114,34 @@ bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
ModifiedRegUnits.init(*TRI);
UsedRegUnits.init(*TRI);
- for (MachineBasicBlock &MBB : Fn) {
- LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
+ if (Subtarget.useMIPSLoadStorePairs()) {
+ for (MachineBasicBlock &MBB : Fn) {
+ LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
+
+ for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ MBBI != E;) {
+ if (TII->isPairableLdStInstOpc(MBBI->getOpcode()) &&
+ tryToPairLdStInst(MBBI))
+ MadeChange = true;
+ else
+ ++MBBI;
+ }
+ }
+ }
- for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
- MBBI != E;) {
- if (TII->isPairableLdStInstOpc(MBBI->getOpcode()) &&
- tryToPairLdStInst(MBBI))
- MadeChange = true;
- else
- ++MBBI;
+ if (!Subtarget.is64Bit() && Subtarget.hasStdExtZilsd()) {
+ for (auto &MBB : Fn) {
+ for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E;) {
+ if (fixInvalidRegPairOp(MBB, MBBI)) {
+ MadeChange = true;
+ // Iterator was updated by fixInvalidRegPairOp
+ } else {
+ ++MBBI;
+ }
+ }
}
}
+
return MadeChange;
}
@@ -395,6 +422,187 @@ RISCVLoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
return NextI;
}
+//===----------------------------------------------------------------------===//
+// Post reg-alloc zilsd pass implementation
+//===----------------------------------------------------------------------===//
+
+bool RISCVLoadStoreOpt::isValidZilsdRegPair(Register First, Register Second) {
+ // Special case: First register can not be zero unless both registers are
+ // zeros.
+ // Spec says: LD instructions with destination x0 are processed as any other
+ // load, but the result is discarded entirely and x1 is not written. If using
+ // x0 as src of SD, the entire 64-bit operand is zero — i.e., register x1 is
+ // not accessed.
+ if (First == RISCV::X0)
+ return Second == RISCV::X0;
+
+ // Check if registers form a valid even/odd pair for Zilsd
+ unsigned FirstNum = TRI->getEncodingValue(First);
+ unsigned SecondNum = TRI->getEncodingValue(Second);
+
+ // Must be consecutive and first must be even
+ return (FirstNum % 2 == 0) && (SecondNum == FirstNum + 1);
+}
+
+void RISCVLoadStoreOpt::splitLdSdIntoTwo(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ bool IsLoad) {
+ MachineInstr *MI = &*MBBI;
+ DebugLoc DL = MI->getDebugLoc();
+
+ const MachineOperand &FirstOp = MI->getOperand(0);
+ const MachineOperand &SecondOp = MI->getOperand(1);
+ const MachineOperand &BaseOp = MI->getOperand(2);
+ Register FirstReg = FirstOp.getReg();
+ Register SecondReg = SecondOp.getReg();
+ Register BaseReg = BaseOp.getReg();
+
+ // Handle both immediate and symbolic operands for offset
+ const MachineOperand &OffsetOp = MI->getOperand(3);
+ int BaseOffset;
+ if (OffsetOp.isImm())
+ BaseOffset = OffsetOp.getImm();
+ else
+ // For symbolic operands, extract the embedded offset
+ BaseOffset = OffsetOp.getOffset();
+
+ unsigned Opc = IsLoad ? RISCV::LW : RISCV::SW;
+ MachineInstrBuilder MIB1, MIB2;
+
+ // Create two separate instructions
+ if (IsLoad) {
+ // It's possible that first register is same as base register, when we split
+ // it becomes incorrect because base register is overwritten, e.g.
+ // X10, X13 = PseudoLD_RV32_OPT killed X10, 0
+ // =>
+ // X10 = LW X10, 0
+ // X13 = LW killed X10, 4
+ // we can just switch the order to resolve that:
+ // X13 = LW X10, 4
+ // X10 = LW killed X10, 0
+ if (FirstReg == BaseReg) {
+ MIB2 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
+ .addReg(SecondReg,
+ RegState::Define | getDeadRegState(SecondOp.isDead()))
+ .addReg(BaseReg);
+ MIB1 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
+ .addReg(FirstReg,
+ RegState::Define | getDeadRegState(FirstOp.isDead()))
+ .addReg(BaseReg, getKillRegState(BaseOp.isKill()));
+
+ } else {
+ MIB1 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
+ .addReg(FirstReg,
+ RegState::Define | getDeadRegState(FirstOp.isDead()))
+ .addReg(BaseReg);
+
+ MIB2 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
+ .addReg(SecondReg,
+ RegState::Define | getDeadRegState(SecondOp.isDead()))
+ .addReg(BaseReg, getKillRegState(BaseOp.isKill()));
+ }
+
+ ++NumLD2LW;
+ LLVM_DEBUG(dbgs() << "Split LD back to two LW instructions\n");
+ } else {
+ assert(
+ FirstReg != SecondReg &&
+ "First register and second register is impossible to be same register");
+ MIB1 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
+ .addReg(FirstReg, getKillRegState(FirstOp.isKill()))
+ .addReg(BaseReg);
+
+ MIB2 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
+ .addReg(SecondReg, getKillRegState(SecondOp.isKill()))
+ .addReg(BaseReg, getKillRegState(BaseOp.isKill()));
+
+ ++NumSD2SW;
+ LLVM_DEBUG(dbgs() << "Split SD back to two SW instructions\n");
+ }
+
+ // Add offset operands - preserve symbolic references
+ MIB1.add(OffsetOp);
+ if (OffsetOp.isImm())
+ MIB2.addImm(BaseOffset + 4);
+ else if (OffsetOp.isGlobal())
+ MIB2.addGlobalAddress(OffsetOp.getGlobal(), BaseOffset + 4,
+ OffsetOp.getTargetFlags());
+ else if (OffsetOp.isCPI())
+ MIB2.addConstantPoolIndex(OffsetOp.getIndex(), BaseOffset + 4,
+ OffsetOp.getTargetFlags());
+ else if (OffsetOp.isBlockAddress())
+ MIB2.addBlockAddress(OffsetOp.getBlockAddress(), BaseOffset + 4,
+ OffsetOp.getTargetFlags());
+
+ // Copy memory operands if the original instruction had them
+ // FIXME: This is overly conservative; the new instruction accesses 4 bytes,
+ // not 8.
+ MIB1.cloneMemRefs(*MI);
+ MIB2.cloneMemRefs(*MI);
+
+ // Remove the original paired instruction and update iterator
+ MBBI = MBB.erase(MBBI);
+}
+
+bool RISCVLoadStoreOpt::fixInvalidRegPairOp(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI) {
+ MachineInstr *MI = &*MBBI;
+ unsigned Opcode = MI->getOpcode();
+
+ // Check if this is a Zilsd pseudo that needs fixing
+ if (Opcode != RISCV::PseudoLD_RV32_OPT && Opcode != RISCV::PseudoSD_RV32_OPT)
+ return false;
+
+ bool IsLoad = Opcode == RISCV::PseudoLD_RV32_OPT;
+
+ const MachineOperand &FirstOp = MI->getOperand(0);
+ const MachineOperand &SecondOp = MI->getOperand(1);
+ Register FirstReg = FirstOp.getReg();
+ Register SecondReg = SecondOp.getReg();
+
+ if (!isValidZilsdRegPair(FirstReg, SecondReg)) {
+ // Need to split back into two instructions
+ splitLdSdIntoTwo(MBB, MBBI, IsLoad);
+ return true;
+ }
+
+ // Registers are valid, convert to real LD/SD instruction
+ const MachineOperand &BaseOp = MI->getOperand(2);
+ Register BaseReg = BaseOp.getReg();
+ DebugLoc DL = MI->getDebugLoc();
+ // Handle both immediate and symbolic operands for offset
+ const MachineOperand &OffsetOp = MI->getOperand(3);
+
+ unsigned RealOpc = IsLoad ? RISCV::LD_RV32 : RISCV::SD_RV32;
+
+ // Create register pair from the two individual registers
+ unsigned RegPair = TRI->getMatchingSuperReg(FirstReg, RISCV::sub_gpr_even,
+ &RISCV::GPRPairRegClass);
+ // Create the real LD/SD instruction with register pair
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(RealOpc));
+
+ if (IsLoad) {
+ // For LD, the register pair is the destination
+ MIB.addReg(RegPair, RegState::Define | getDeadRegState(FirstOp.isDead() &&
+ SecondOp.isDead()));
+ } else {
+ // For SD, the register pair is the source
+ MIB.addReg(RegPair, getKillRegState(FirstOp.isKill() && SecondOp.isKill()));
+ }
+
+ MIB.addReg(BaseReg, getKillRegState(BaseOp.isKill()))
+ .add(OffsetOp)
+ .cloneMemRefs(*MI);
+
+ LLVM_DEBUG(dbgs() << "Converted pseudo to real instruction: " << *MIB
+ << "\n");
+
+ // Remove the pseudo instruction and update iterator
+ MBBI = MBB.erase(MBBI);
+
+ return true;
+}
+
// Returns an instance of the Load / Store Optimization pass.
FunctionPass *llvm::createRISCVLoadStoreOptPass() {
return new RISCVLoadStoreOpt();
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 84bb29433fb3b..263d6a1fc2220 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -864,6 +864,46 @@ bool RISCVRegisterInfo::getRegAllocationHints(
const MachineRegisterInfo *MRI = &MF.getRegInfo();
auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
+ // Handle RegPairEven/RegPairOdd hints for Zilsd register pairs
+ std::pair<unsigned, Register> Hint = MRI->getRegAllocationHint(VirtReg);
+ unsigned HintType = Hint.first;
+ Register Partner = Hint.second;
+
+ if (HintType == RISCVRI::RegPairEven || HintType == RISCVRI::RegPairOdd) {
+ // Check if we want the even or odd register of a consecutive pair
+ bool WantOdd = (HintType == RISCVRI::RegPairOdd);
+
+ // First priority: Check if partner is already allocated
+ if (Partner.isVirtual() && VRM && VRM->hasPhys(Partner)) {
+ MCRegister PartnerPhys = VRM->getPhys(Partner);
+ // Calculate the exact register we need for consecutive pairing
+ MCRegister TargetReg = PartnerPhys.id() + (WantOdd ? 1 : -1);
+
+ // Verify it's valid and available
+ if (RISCV::GPRRegClass.contains(TargetReg) &&
+ is_contained(Order, TargetReg))
+ Hints.push_back(TargetReg.id());
+ }
+
+ // Second priority: Try to find consecutive register pairs in the allocation
+ // order
+ for (MCPhysReg PhysReg : Order) {
+ if (!PhysReg)
+ continue;
+
+ unsigned RegNum = getEncodingValue(PhysReg);
+ // Check if this register matches the even/odd requirement
+ bool IsOdd = (RegNum % 2 != 0);
+
+ // Verify the pair register exists and is in the same register class
+ // TODO: Skip unallocatable registers: we need to prevent any of odd/even
+ // to be reserved, so if we need odd, we need to check if corresponding
+ // even is preserved, vice versa.
+ if ((WantOdd && IsOdd) || (!WantOdd && !IsOdd))
+ Hints.push_back(PhysReg);
+ }
+ }
+
bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
VirtReg, Order, Hints, MF, VRM, Matrix);
@@ -1005,6 +1045,35 @@ bool RISCVRegisterInfo::getRegAllocationHints(
return BaseImplRetVal;
}
+void RISCVRegisterInfo::updateRegAllocHint(Register Reg, Register NewReg,
+ MachineFunction &MF) const {
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ std::pair<unsigned, Register> Hint = MRI->getRegAllocationHint(Reg);
+
+ // Handle RegPairEven/RegPairOdd hints for Zilsd register pairs
+ if ((Hint.first == RISCVRI::RegPairOdd ||
+ Hint.first == RISCVRI::RegPairEven) &&
+ Hint.second.isVirtual()) {
+ // If 'Reg' is one of the even/odd register pair and it's now changed
+ // (e.g. coalesced) into a
diff erent register, the other register of the
+ // pair allocation hint must be updated to reflect the relationship change.
+ Register Partner = Hint.second;
+ std::pair<unsigned, Register> PartnerHint =
+ MRI->getRegAllocationHint(Partner);
+
+ // Make sure partner still points to us
+ if (PartnerHint.second == Reg) {
+ // Update partner to point to NewReg instead of Reg
+ MRI->setRegAllocationHint(Partner, PartnerHint.first, NewReg);
+
+ // If NewReg is virtual, set up the reciprocal hint
+ // NewReg takes over Reg's role, so it gets the SAME hint type as Reg
+ if (NewReg.isVirtual())
+ MRI->setRegAllocationHint(NewReg, Hint.first, Partner);
+ }
+ }
+}
+
Register
RISCVRegisterInfo::findVRegWithEncoding(const TargetRegisterClass &RegClass,
uint16_t Encoding) const {
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
index 67726db504122..f29f85e4987f6 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
@@ -37,6 +37,13 @@ enum : uint8_t {
NFShiftMask = 0b111 << NFShift,
};
+/// Register allocation hints for Zilsd register pairs
+enum {
+ // Used for Zilsd LD/SD register pairs
+ RegPairOdd = 1,
+ RegPairEven = 2,
+};
+
/// \returns the IsVRegClass for the register class.
static inline bool isVRegClass(uint8_t TSFlags) {
return (TSFlags & IsVRegClassShiftMask) >> IsVRegClassShift;
@@ -143,6 +150,9 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
const MachineFunction &MF, const VirtRegMap *VRM,
const LiveRegMatrix *Matrix) const override;
+ void updateRegAllocHint(Register Reg, Register NewReg,
+ MachineFunction &MF) const override;
+
Register findVRegWithEncoding(const TargetRegisterClass &RegClass,
uint16_t Encoding) const;
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 8f6c0af5de3b4..52dc38564059c 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -141,6 +141,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
initializeRISCVPushPopOptPass(*PR);
initializeRISCVIndirectBranchTrackingPass(*PR);
initializeRISCVLoadStoreOptPass(*PR);
+ initializeRISCVPreAllocZilsdOptPass(*PR);
initializeRISCVExpandAtomicPseudoPass(*PR);
initializeRISCVRedundantCopyEliminationPass(*PR);
initializeRISCVAsmPrinterPass(*PR);
@@ -604,6 +605,8 @@ void RISCVPassConfig::addPreRegAlloc() {
if (TM->getOptLevel() != CodeGenOptLevel::None) {
addPass(createRISCVMergeBaseOffsetOptPass());
addPass(createRISCVVLOptimizerPass());
+ // Add Zilsd pre-allocation load/store optimization
+ addPass(createRISCVPreAllocZilsdOptPass());
}
addPass(createRISCVInsertReadWriteCSRPass());
diff --git a/llvm/lib/Target/RISCV/RISCVZilsdOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVZilsdOptimizer.cpp
new file mode 100644
index 0000000000000..99e83fbb05a73
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVZilsdOptimizer.cpp
@@ -0,0 +1,529 @@
+//===-- RISCVZilsdOptimizer.cpp - RISC-V Zilsd Load/Store Optimizer ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that performs load/store optimizations for the
+// RISC-V Zilsd extension. It combines pairs of 32-bit load/store instructions
+// into single 64-bit LD/SD instructions when possible.
+//
+// The pass runs in two phases:
+// 1. Pre-allocation: Reschedules loads/stores to bring consecutive memory
+// accesses closer together and forms LD/SD pairs with register hints.
+// 2. Post-allocation: Fixes invalid LD/SD instructions if register allocation
+// didn't provide suitable consecutive registers.
+//
+// Note: second phase is integrated into RISCVLoadStoreOptimizer
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVInstrInfo.h"
+#include "RISCVRegisterInfo.h"
+#include "RISCVSubtarget.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include <algorithm>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-zilsd-opt"
+
+STATISTIC(NumLDFormed, "Number of LD instructions formed");
+STATISTIC(NumSDFormed, "Number of SD instructions formed");
+
+static cl::opt<bool>
+ DisableZilsdOpt("disable-riscv-zilsd-opt", cl::Hidden, cl::init(false),
+ cl::desc("Disable Zilsd load/store optimization"));
+
+static cl::opt<unsigned> MaxRescheduleDistance(
+ "riscv-zilsd-max-reschedule-distance", cl::Hidden, cl::init(10),
+ cl::desc("Maximum distance for rescheduling load/store instructions"));
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+// Pre-allocation Zilsd optimization pass
+//===----------------------------------------------------------------------===//
+class RISCVPreAllocZilsdOpt : public MachineFunctionPass {
+public:
+ static char ID;
+
+ RISCVPreAllocZilsdOpt() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override {
+ return "RISC-V pre-allocation Zilsd load/store optimization";
+ }
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().setIsSSA();
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<MachineDominatorTreeWrapperPass>();
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ enum class MemoryOffsetKind {
+ Imm = 0,
+ Global = 1,
+ CPI = 2,
+ BlockAddr = 3,
+ Unknown = 4,
+ };
+ using MemOffset = std::pair<MemoryOffsetKind, int>;
+ using BaseRegInfo = std::pair<unsigned, MemoryOffsetKind>;
+
+private:
+ bool isMemoryOp(const MachineInstr &MI);
+ bool rescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
+ bool canFormLdSdPair(MachineInstr *MI0, MachineInstr *MI1);
+ bool rescheduleOps(MachineBasicBlock *MBB,
+ SmallVectorImpl<MachineInstr *> &MIs, BaseRegInfo Base,
+ bool IsLoad,
+ DenseMap<MachineInstr *, unsigned> &MI2LocMap);
+ bool isSafeToMove(MachineInstr *MI, MachineInstr *Target, bool MoveForward);
+ MemOffset getMemoryOpOffset(const MachineInstr &MI);
+
+ const RISCVSubtarget *STI;
+ const RISCVInstrInfo *TII;
+ const RISCVRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+ AliasAnalysis *AA;
+ MachineDominatorTree *DT;
+ Align RequiredAlign;
+};
+
+} // end anonymous namespace
+
+char RISCVPreAllocZilsdOpt::ID = 0;
+
+INITIALIZE_PASS_BEGIN(RISCVPreAllocZilsdOpt, "riscv-prera-zilsd-opt",
+ "RISC-V pre-allocation Zilsd optimization", false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
+INITIALIZE_PASS_END(RISCVPreAllocZilsdOpt, "riscv-prera-zilsd-opt",
+ "RISC-V pre-allocation Zilsd optimization", false, false)
+
+//===----------------------------------------------------------------------===//
+// Pre-allocation pass implementation
+//===----------------------------------------------------------------------===//
+
+bool RISCVPreAllocZilsdOpt::runOnMachineFunction(MachineFunction &MF) {
+
+ if (DisableZilsdOpt || skipFunction(MF.getFunction()))
+ return false;
+
+ STI = &MF.getSubtarget<RISCVSubtarget>();
+
+ // Only run on RV32 with Zilsd extension
+ if (STI->is64Bit() || !STI->hasStdExtZilsd())
+ return false;
+
+ TII = STI->getInstrInfo();
+ TRI = STI->getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
+
+ // Check alignment: default is 8-byte, but allow 4-byte with tune feature
+ // If unaligned scalar memory is enabled, allow any alignment
+ RequiredAlign = STI->enableUnalignedScalarMem() ? Align(1)
+ : STI->allowZilsd4ByteAlign() ? Align(4)
+ : Align(8);
+ bool Modified = false;
+ for (auto &MBB : MF) {
+ Modified |= rescheduleLoadStoreInstrs(&MBB);
+ }
+
+ return Modified;
+}
+
+RISCVPreAllocZilsdOpt::MemOffset
+RISCVPreAllocZilsdOpt::getMemoryOpOffset(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case RISCV::LW:
+ case RISCV::SW: {
+ // For LW/SW, the offset is in operand 2
+ const MachineOperand &OffsetOp = MI.getOperand(2);
+
+ // Handle immediate offset
+ if (OffsetOp.isImm())
+ return std::make_pair(MemoryOffsetKind::Imm, OffsetOp.getImm());
+
+ // Handle symbolic operands with MO_LO flag (from MergeBaseOffset)
+ if (OffsetOp.getTargetFlags() & RISCVII::MO_LO) {
+ if (OffsetOp.isGlobal())
+ return std::make_pair(MemoryOffsetKind::Global, OffsetOp.getOffset());
+ if (OffsetOp.isCPI())
+ return std::make_pair(MemoryOffsetKind::CPI, OffsetOp.getOffset());
+ if (OffsetOp.isBlockAddress())
+ return std::make_pair(MemoryOffsetKind::BlockAddr,
+ OffsetOp.getOffset());
+ }
+
+ break;
+ }
+ default:
+ break;
+ }
+
+ return std::make_pair(MemoryOffsetKind::Unknown, 0);
+}
+
+bool RISCVPreAllocZilsdOpt::canFormLdSdPair(MachineInstr *MI0,
+ MachineInstr *MI1) {
+ if (!MI0->hasOneMemOperand() || !MI1->hasOneMemOperand())
+ return false;
+
+ // Get offsets and check they are consecutive
+ int Offset0 = getMemoryOpOffset(*MI0).second;
+ int Offset1 = getMemoryOpOffset(*MI1).second;
+
+ // Offsets must be 4 bytes apart
+ if (Offset1 - Offset0 != 4)
+ return false;
+
+ // We need to guarantee the alignment(base + offset) is legal.
+ const MachineMemOperand *MMO = *MI0->memoperands_begin();
+ if (MMO->getAlign() < RequiredAlign)
+ return false;
+
+ // Check that the two destination/source registers are
diff erent for
+ // load/store respectively.
+ Register FirstReg = MI0->getOperand(0).getReg();
+ Register SecondReg = MI1->getOperand(0).getReg();
+ if (FirstReg == SecondReg)
+ return false;
+
+ return true;
+}
+
+bool RISCVPreAllocZilsdOpt::isSafeToMove(MachineInstr *MI, MachineInstr *Target,
+ bool MoveForward) {
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineBasicBlock::iterator Start = MI->getIterator();
+ MachineBasicBlock::iterator End = Target->getIterator();
+
+ if (!MoveForward)
+ std::swap(Start, End);
+
+ // Increment Start to skip the current instruction
+ if (Start != MBB->end())
+ ++Start;
+
+ Register DefReg = MI->getOperand(0).getReg();
+ Register BaseReg = MI->getOperand(1).getReg();
+
+ unsigned ScanCount = 0;
+ for (auto It = Start; It != End; ++It, ++ScanCount) {
+ // Don't move across calls or terminators
+ if (It->isCall() || It->isTerminator()) {
+ LLVM_DEBUG(dbgs() << "Cannot move across call/terminator: " << *It);
+ return false;
+ }
+
+ // Don't move across instructions that modify memory barrier
+ if (It->hasUnmodeledSideEffects()) {
+ LLVM_DEBUG(dbgs() << "Cannot move across instruction with side effects: "
+ << *It);
+ return false;
+ }
+
+ // Check if the base register is modified
+ if (It->modifiesRegister(BaseReg, TRI)) {
+ LLVM_DEBUG(dbgs() << "Base register " << BaseReg
+ << " modified by: " << *It);
+ return false;
+ }
+
+ // For loads, check if the loaded value is used
+ if (MI->mayLoad() &&
+ (It->readsRegister(DefReg, TRI) || It->modifiesRegister(DefReg, TRI))) {
+ LLVM_DEBUG(dbgs() << "Destination register " << DefReg
+ << " used by: " << *It);
+ return false;
+ }
+
+ // For stores, check if the stored register is modified
+ if (MI->mayStore() && It->modifiesRegister(DefReg, TRI)) {
+ LLVM_DEBUG(dbgs() << "Source register " << DefReg
+ << " modified by: " << *It);
+ return false;
+ }
+
+ // Check for memory operation interference
+ if (It->mayLoadOrStore() && It->mayAlias(AA, *MI, /*UseTBAA*/ false)) {
+ LLVM_DEBUG(dbgs() << "Memory operation interference detected\n");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool RISCVPreAllocZilsdOpt::rescheduleOps(
+ MachineBasicBlock *MBB, SmallVectorImpl<MachineInstr *> &MIs,
+ BaseRegInfo Base, bool IsLoad,
+ DenseMap<MachineInstr *, unsigned> &MI2LocMap) {
+ // Sort by offset, at this point it ensure base reg and MemoryOffsetKind are
+ // same, so we just need to simply sort by offset value.
+ llvm::sort(MIs.begin(), MIs.end(), [this](MachineInstr *A, MachineInstr *B) {
+ return getMemoryOpOffset(*A).second < getMemoryOpOffset(*B).second;
+ });
+
+ bool Modified = false;
+
+ // Try to pair consecutive operations
+ for (size_t i = 0; i + 1 < MIs.size(); i++) {
+ MachineInstr *MI0 = MIs[i];
+ MachineInstr *MI1 = MIs[i + 1];
+
+ Register FirstReg = MI0->getOperand(0).getReg();
+ Register SecondReg = MI1->getOperand(0).getReg();
+ Register BaseReg = MI0->getOperand(1).getReg();
+ const MachineOperand &OffsetOp = MI0->getOperand(2);
+
+ // At this point, MI0 and MI1 are:
+ // 1. both either LW or SW.
+ // 2. guaranteed to have same memory kind.
+ // 3. guaranteed to have same base register.
+ // 4. already be sorted by offset value.
+ // so we don't have to check these in canFormLdSdPair.
+ if (!canFormLdSdPair(MI0, MI1))
+ continue;
+
+ // Use MI2LocMap to determine which instruction appears later in program
+ // order
+ bool MI1IsLater = MI2LocMap[MI1] > MI2LocMap[MI0];
+
+ // For loads: move later instruction up (backwards) to earlier instruction
+ // For stores: move earlier instruction down (forwards) to later instruction
+ MachineInstr *MoveInstr, *TargetInstr;
+ if (IsLoad) {
+ // For loads: move the later instruction to the earlier one
+ MoveInstr = MI1IsLater ? MI1 : MI0;
+ TargetInstr = MI1IsLater ? MI0 : MI1;
+ } else {
+ // For stores: move the earlier instruction to the later one
+ MoveInstr = MI1IsLater ? MI0 : MI1;
+ TargetInstr = MI1IsLater ? MI1 : MI0;
+ }
+
+ unsigned Distance = MI1IsLater ? MI2LocMap[MI1] - MI2LocMap[MI0]
+ : MI2LocMap[MI0] - MI2LocMap[MI1];
+ if (!isSafeToMove(MoveInstr, TargetInstr, !IsLoad) ||
+ Distance > MaxRescheduleDistance)
+ continue;
+
+ // Move the instruction to the target position
+ MachineBasicBlock::iterator InsertPos = TargetInstr->getIterator();
+ ++InsertPos;
+
+ // If we need to move an instruction, do it now
+ if (MoveInstr != TargetInstr)
+ MBB->splice(InsertPos, MBB, MoveInstr->getIterator());
+
+ // Create the paired instruction
+ MachineInstrBuilder MIB;
+ DebugLoc DL = MI0->getDebugLoc();
+
+ if (IsLoad) {
+ MIB = BuildMI(*MBB, InsertPos, DL, TII->get(RISCV::PseudoLD_RV32_OPT))
+ .addReg(FirstReg, RegState::Define)
+ .addReg(SecondReg, RegState::Define)
+ .addReg(BaseReg)
+ .add(OffsetOp);
+ ++NumLDFormed;
+ LLVM_DEBUG(dbgs() << "Formed LD: " << *MIB << "\n");
+ } else {
+ MIB = BuildMI(*MBB, InsertPos, DL, TII->get(RISCV::PseudoSD_RV32_OPT))
+ .addReg(FirstReg)
+ .addReg(SecondReg)
+ .addReg(BaseReg)
+ .add(OffsetOp);
+ ++NumSDFormed;
+ LLVM_DEBUG(dbgs() << "Formed SD: " << *MIB << "\n");
+ }
+
+ // Copy memory operands
+ MIB.cloneMergedMemRefs({MI0, MI1});
+
+ // Add register allocation hints for consecutive registers
+ // RISC-V Zilsd requires even/odd register pairs
+ // Only set hints for virtual registers (physical registers already have
+ // encoding)
+ if (FirstReg.isVirtual() && SecondReg.isVirtual()) {
+ // For virtual registers, we can't determine even/odd yet, but we can hint
+ // that they should be allocated as a consecutive pair
+ MRI->setRegAllocationHint(FirstReg, RISCVRI::RegPairEven, SecondReg);
+ MRI->setRegAllocationHint(SecondReg, RISCVRI::RegPairOdd, FirstReg);
+ }
+
+ // Remove the original instructions
+ MI0->eraseFromParent();
+ MI1->eraseFromParent();
+
+ Modified = true;
+
+ // Skip the next instruction since we've already processed it
+ i++;
+ }
+
+ return Modified;
+}
+
+bool RISCVPreAllocZilsdOpt::isMemoryOp(const MachineInstr &MI) {
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode != RISCV::LW && Opcode != RISCV::SW)
+ return false;
+
+ if (!MI.getOperand(1).isReg())
+ return false;
+
+ // When no memory operands are present, conservatively assume unaligned,
+ // volatile, unfoldable.
+ if (!MI.hasOneMemOperand())
+ return false;
+
+ const MachineMemOperand *MMO = *MI.memoperands_begin();
+
+ if (MMO->isVolatile() || MMO->isAtomic())
+ return false;
+
+ // sw <undef> could probably be eliminated entirely, but for now we just want
+ // to avoid making a mess of it.
+ if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
+ return false;
+
+ // Likewise don't mess with references to undefined addresses.
+ if (MI.getOperand(1).isUndef())
+ return false;
+
+ return true;
+}
+
+bool RISCVPreAllocZilsdOpt::rescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
+ bool Modified = false;
+
+ // Process the basic block in windows delimited by calls, terminators,
+ // or instructions with duplicate base+offset pairs
+ MachineBasicBlock::iterator MBBI = MBB->begin();
+ MachineBasicBlock::iterator E = MBB->end();
+
+ while (MBBI != E) {
+ // Map from instruction to its location in the current window
+ DenseMap<MachineInstr *, unsigned> MI2LocMap;
+
+ // Map from base register to list of load/store instructions
+ using Base2InstMap = DenseMap<BaseRegInfo, SmallVector<MachineInstr *, 4>>;
+ using BaseVec = SmallVector<BaseRegInfo, 4>;
+ Base2InstMap Base2LdsMap;
+ Base2InstMap Base2StsMap;
+ BaseVec LdBases;
+ BaseVec StBases;
+
+ unsigned Loc = 0;
+
+ // Build the current window of instructions
+ for (; MBBI != E; ++MBBI) {
+ MachineInstr &MI = *MBBI;
+
+ // Stop at barriers (calls and terminators)
+ if (MI.isCall() || MI.isTerminator()) {
+ // Move past the barrier for next iteration
+ ++MBBI;
+ break;
+ }
+
+ // Track instruction location in window
+ if (!MI.isDebugInstr())
+ MI2LocMap[&MI] = ++Loc;
+
+ MemOffset Offset = getMemoryOpOffset(MI);
+ // Skip non-memory operations or it's not a valid memory offset kind.
+ if (!isMemoryOp(MI) || Offset.first == MemoryOffsetKind::Unknown)
+ continue;
+
+ bool IsLd = (MI.getOpcode() == RISCV::LW);
+ Register Base = MI.getOperand(1).getReg();
+ bool StopHere = false;
+
+ // Lambda to find or add base register entries
+ auto FindBases = [&](Base2InstMap &Base2Ops, BaseVec &Bases) {
+ auto [BI, Inserted] = Base2Ops.try_emplace({Base.id(), Offset.first});
+ if (Inserted) {
+ // First time seeing this base register
+ BI->second.push_back(&MI);
+ Bases.push_back({Base.id(), Offset.first});
+ return;
+ }
+ // Check if we've seen this exact base+offset before
+ if (any_of(BI->second, [&](const MachineInstr *PrevMI) {
+ return Offset == getMemoryOpOffset(*PrevMI);
+ })) {
+ // Found duplicate base+offset - stop here to process current window
+ StopHere = true;
+ } else {
+ BI->second.push_back(&MI);
+ }
+ };
+
+ if (IsLd)
+ FindBases(Base2LdsMap, LdBases);
+ else
+ FindBases(Base2StsMap, StBases);
+
+ if (StopHere) {
+ // Found a duplicate (a base+offset combination that's seen earlier).
+ // Backtrack to process the current window.
+ --Loc;
+ break;
+ }
+ }
+
+ // Process the current window - reschedule loads
+ for (auto Base : LdBases) {
+ SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
+ if (Lds.size() > 1) {
+ Modified |= rescheduleOps(MBB, Lds, Base, true, MI2LocMap);
+ }
+ }
+
+ // Process the current window - reschedule stores
+ for (auto Base : StBases) {
+ SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
+ if (Sts.size() > 1) {
+ Modified |= rescheduleOps(MBB, Sts, Base, false, MI2LocMap);
+ }
+ }
+ }
+
+ return Modified;
+}
+
+//===----------------------------------------------------------------------===//
+// Pass creation functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createRISCVPreAllocZilsdOptPass() {
+ return new RISCVPreAllocZilsdOpt();
+}
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index 769823d1c4216..3e2de780524b6 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -126,6 +126,7 @@
; CHECK-NEXT: RISC-V Merge Base Offset
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: RISC-V VL Optimizer
+; CHECK-NEXT: RISC-V pre-allocation Zilsd load/store optimization
; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass
; CHECK-NEXT: RISC-V Insert Write VXRM Pass
; CHECK-NEXT: RISC-V Landing Pad Setup
diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll
index 3d9906fdcbeb3..010d3c68b5ef1 100644
--- a/llvm/test/CodeGen/RISCV/features-info.ll
+++ b/llvm/test/CodeGen/RISCV/features-info.ll
@@ -297,6 +297,7 @@
; CHECK-NEXT: zihintpause - 'Zihintpause' (Pause Hint).
; CHECK-NEXT: zihpm - 'Zihpm' (Hardware Performance Counters).
; CHECK-NEXT: zilsd - 'Zilsd' (Load/Store Pair Instructions).
+; CHECK-NEXT: zilsd-4byte-align - Allow 4-byte alignment for Zilsd LD/SD instructions.
; CHECK-NEXT: zimop - 'Zimop' (May-Be-Operations).
; CHECK-NEXT: zk - 'Zk' (Standard scalar cryptography extension).
; CHECK-NEXT: zkn - 'Zkn' (NIST Algorithm Suite).
diff --git a/llvm/test/CodeGen/RISCV/zilsd-ldst-opt-postra.mir b/llvm/test/CodeGen/RISCV/zilsd-ldst-opt-postra.mir
new file mode 100644
index 0000000000000..c27cb25366f27
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/zilsd-ldst-opt-postra.mir
@@ -0,0 +1,216 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# NOTE: Test expansion of PseudoLD_RV32_OPT/PseudoSD_RV32_OPT after register allocation
+# RUN: llc -mtriple=riscv32 -mattr=+zilsd -run-pass riscv-load-store-opt %s -o - | FileCheck %s
+--- |
+ define i32 @expand_pseudold_valid(ptr %0) {
+ %2 = load i32, ptr %0, align 4
+ %3 = getelementptr inbounds i32, ptr %0, i32 1
+ %4 = load i32, ptr %3, align 4
+ %5 = add i32 %2, %4
+ ret i32 %5
+ }
+
+ define void @expand_pseudosd_valid(ptr %0, i32 %1, i32 %2) {
+ store i32 %1, ptr %0, align 4
+ %4 = getelementptr inbounds i32, ptr %0, i32 1
+ store i32 %2, ptr %4, align 4
+ ret void
+ }
+
+ define i32 @expand_pseudold_invalid_pair(ptr %0) {
+ %2 = load i32, ptr %0, align 4
+ %3 = getelementptr inbounds i32, ptr %0, i32 1
+ %4 = load i32, ptr %3, align 4
+ %5 = add i32 %2, %4
+ ret i32 %5
+ }
+
+ define void @expand_pseudosd_invalid_pair(ptr %0, i32 %1, i32 %2) {
+ store i32 %1, ptr %0, align 4
+ %4 = getelementptr inbounds i32, ptr %0, i32 1
+ store i32 %2, ptr %4, align 4
+ ret void
+ }
+
+ define void @store_zero_combine_valid(ptr %0) {
+ store i32 0, ptr %0, align 8
+ %2 = getelementptr inbounds i32, ptr %0, i32 1
+ store i32 0, ptr %2, align 8
+ ret void
+ }
+
+ define void @store_zero_combine_invalid(ptr %0, i32 %1) {
+ store i32 %1, ptr %0, align 8
+ %3 = getelementptr inbounds i32, ptr %0, i32 1
+ store i32 0, ptr %3, align 8
+ ret void
+ }
+
+ @global_array = external global [100 x i32]
+
+ define i32 @expand_pseudold_invalid_symbolic() {
+ ret i32 0
+ }
+
+ define i32 @overlapped_first_reg_base_reg() {
+ ret i32 0
+ }
+...
+---
+# Valid consecutive even/odd register pair - should expand to LD_RV32
+name: expand_pseudold_valid
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; PseudoLD_RV32_OPT with consecutive even/odd registers (x12, x13)
+ ; CHECK-LABEL: name: expand_pseudold_valid
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x12_x13 = LD_RV32 killed $x10, 0
+ ; CHECK-NEXT: $x10 = ADD killed $x12, killed $x13
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ $x12, $x13 = PseudoLD_RV32_OPT killed $x10, 0
+ $x10 = ADD killed $x12, killed $x13
+ PseudoRET implicit $x10
+
+...
+---
+# Valid consecutive even/odd register pair - should expand to SD_RV32
+name: expand_pseudosd_valid
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10, $x12, $x13
+
+ ; PseudoSD_RV32_OPT with consecutive even/odd registers (x12, x13)
+ ; CHECK-LABEL: name: expand_pseudosd_valid
+ ; CHECK: liveins: $x10, $x12, $x13
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: SD_RV32 killed $x12_x13, killed $x10, 0
+ ; CHECK-NEXT: PseudoRET
+ PseudoSD_RV32_OPT killed $x12, killed $x13, killed $x10, 0
+ PseudoRET
+
+...
+---
+# Invalid register pair (not consecutive) - should decompose back to LW
+name: expand_pseudold_invalid_pair
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; PseudoLD_RV32_OPT with non-consecutive registers (x11, x13)
+ ; Should decompose back to two LW instructions
+ ; CHECK-LABEL: name: expand_pseudold_invalid_pair
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x11 = LW $x10, 0
+ ; CHECK-NEXT: $x13 = LW killed $x10, 4
+ ; CHECK-NEXT: $x10 = ADD killed $x11, killed $x13
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ $x11, $x13 = PseudoLD_RV32_OPT killed $x10, 0
+ $x10 = ADD killed $x11, killed $x13
+ PseudoRET implicit $x10
+
+...
+---
+# Invalid register pair (not even/odd) - should decompose back to SW
+name: expand_pseudosd_invalid_pair
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10, $x11, $x14
+
+ ; PseudoSD_RV32_OPT with non-consecutive registers (x11, x14)
+ ; Should decompose back to two SW instructions
+ ; CHECK-LABEL: name: expand_pseudosd_invalid_pair
+ ; CHECK: liveins: $x10, $x11, $x14
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: SW killed $x11, $x10, 0
+ ; CHECK-NEXT: SW killed $x14, killed $x10, 4
+ ; CHECK-NEXT: PseudoRET
+ PseudoSD_RV32_OPT killed $x11, killed $x14, killed $x10, 0
+ PseudoRET
+
+...
+---
+# Test store zero combinations - zeros don't need consecutive pairs
+name: store_zero_combine_valid
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10
+ ; CHECK-LABEL: name: store_zero_combine_valid
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: SD_RV32 $x0_pair, killed $x10, 0
+ ; CHECK-NEXT: PseudoRET
+ PseudoSD_RV32_OPT $x0, $x0, killed $x10, 0
+ PseudoRET
+
+...
+---
+# Test store zero base combinations - zero can't be first register unless both
+# are zeros
+name: store_zero_combine_invalid
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10
+ ; CHECK-LABEL: name: store_zero_combine_invalid
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: SW killed $x0, $x10, 0
+ ; CHECK-NEXT: SW killed $x1, killed $x10, 4
+ ; CHECK-NEXT: PseudoRET
+ PseudoSD_RV32_OPT killed $x0, killed $x1, killed $x10, 0
+ PseudoRET
+
+...
+---
+# Test invalid register pair with symbolic operands - should split back to LW
+name: expand_pseudold_invalid_symbolic
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; PseudoLD_RV32_OPT with symbolic operand and non-consecutive registers (x11, x14)
+ ; Should decompose back to two LW instructions preserving symbolic references
+ ; CHECK-LABEL: name: expand_pseudold_invalid_symbolic
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x11 = LW $x10, target-flags(riscv-lo) @global_array
+ ; CHECK-NEXT: $x14 = LW killed $x10, target-flags(riscv-lo) @global_array + 4
+ ; CHECK-NEXT: $x10 = ADD killed $x11, killed $x14
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ $x11, $x14 = PseudoLD_RV32_OPT killed $x10, target-flags(riscv-lo) @global_array
+ $x10 = ADD killed $x11, killed $x14
+ PseudoRET implicit $x10
+
+...
+---
+# Test overlapped first reg and base reg - should split back to LW
+name: overlapped_first_reg_base_reg
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; PseudoLD_RV32_OPT with first destination register overlapping base register ($x11)
+ ; Should decompose back to two LW instructions when first reg overlaps base reg
+ ; CHECK-LABEL: name: overlapped_first_reg_base_reg
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x14 = LW $x11, 4
+ ; CHECK-NEXT: $x11 = LW killed $x11, 0
+ ; CHECK-NEXT: $x10 = ADD killed $x11, killed $x14
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ $x11, $x14 = PseudoLD_RV32_OPT killed $x11, 0
+ $x10 = ADD killed $x11, killed $x14
+ PseudoRET implicit $x10
+
+...
diff --git a/llvm/test/CodeGen/RISCV/zilsd-ldst-opt-prera.mir b/llvm/test/CodeGen/RISCV/zilsd-ldst-opt-prera.mir
new file mode 100644
index 0000000000000..dab394d4bc8c4
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/zilsd-ldst-opt-prera.mir
@@ -0,0 +1,1242 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=riscv32 -mattr=+zilsd -run-pass riscv-prera-zilsd-opt %s -o - | FileCheck %s
+# RUN: llc -mtriple=riscv32 -mattr=+zilsd,+zilsd-4byte-align -run-pass riscv-prera-zilsd-opt %s -o - | FileCheck %s --check-prefix=CHECK-4BYTE
+--- |
+ declare void @external_func()
+
+ define i32 @basic_load_combine(ptr %0) {
+ %2 = load i32, ptr %0, align 4
+ %3 = getelementptr inbounds i32, ptr %0, i32 1
+ %4 = load i32, ptr %3, align 4
+ %5 = add i32 %2, %4
+ ret i32 %5
+ }
+
+ define void @basic_store_combine(ptr %0, i32 %1, i32 %2) {
+ store i32 %1, ptr %0, align 4
+ %4 = getelementptr inbounds i32, ptr %0, i32 1
+ store i32 %2, ptr %4, align 4
+ ret void
+ }
+
+ define i32 @basic_load_combine_8_byte_aligned(ptr %0) {
+ %2 = load i32, ptr %0, align 8
+ %3 = getelementptr inbounds i32, ptr %0, i32 1
+ %4 = load i32, ptr %3, align 4
+ %5 = add i32 %2, %4
+ ret i32 %5
+ }
+
+ define void @basic_store_combine_8_byte_aligned(ptr %0, i32 %1, i32 %2) {
+ store i32 %1, ptr %0, align 8
+ %4 = getelementptr inbounds i32, ptr %0, i32 1
+ store i32 %2, ptr %4, align 4
+ ret void
+ }
+
+
+ define i32 @non_consecutive_offsets(ptr %0) {
+ %2 = load i32, ptr %0, align 4
+ %3 = getelementptr inbounds i32, ptr %0, i32 2
+ %4 = load i32, ptr %3, align 4
+ %5 = add i32 %2, %4
+ ret i32 %5
+ }
+
+ define i32 @
diff erent_base_regs(ptr %0, ptr %1) {
+ %3 = load i32, ptr %0, align 4
+ %4 = getelementptr inbounds i32, ptr %1, i32 1
+ %5 = load i32, ptr %4, align 4
+ %6 = add i32 %3, %5
+ ret i32 %6
+ }
+
+ define i32 @call_blocks_optimization(ptr %0) {
+ %2 = load i32, ptr %0, align 4
+ call void @external_func()
+ %3 = getelementptr inbounds i32, ptr %0, i32 1
+ %4 = load i32, ptr %3, align 4
+ %5 = add i32 %2, %4
+ ret i32 %5
+ }
+
+ define i32 @terminator_blocks_optimization(ptr %0, i32 %1) {
+ %3 = load i32, ptr %0, align 4
+ %4 = icmp eq i32 %1, %3
+ br i1 %4, label %5, label %5
+ 5:
+ %6 = getelementptr inbounds i32, ptr %0, i32 1
+ %7 = load i32, ptr %6, align 4
+ %8 = add i32 %3, %7
+ ret i32 %8
+ }
+
+ define i32 @memory_aliasing(ptr %0, i32 %1) {
+ %3 = load i32, ptr %0, align 4
+ %4 = getelementptr inbounds i32, ptr %0, i32 2
+ store i32 %1, ptr %4, align 4
+ %5 = getelementptr inbounds i32, ptr %0, i32 1
+ %6 = load i32, ptr %5, align 4
+ %7 = add i32 %3, %6
+ ret i32 %7
+ }
+
+ define i32 @multiple_pairs(ptr %0, ptr %1) {
+ %3 = load i32, ptr %0, align 4
+ %4 = getelementptr inbounds i32, ptr %0, i32 1
+ %5 = load i32, ptr %4, align 4
+ %6 = add i32 %3, %5
+ %7 = getelementptr inbounds i32, ptr %1, i32 2
+ %8 = load i32, ptr %7, align 4
+ %9 = getelementptr inbounds i32, ptr %1, i32 3
+ %10 = load i32, ptr %9, align 4
+ %11 = add i32 %8, %10
+ %12 = add i32 %6, %11
+ ret i32 %12
+ }
+
+ define i32 @many_loads(ptr %0) {
+ %2 = getelementptr inbounds i32, ptr %0, i32 4
+ %3 = load i32, ptr %2, align 4
+ %4 = getelementptr inbounds i32, ptr %0, i32 5
+ %5 = load i32, ptr %4, align 4
+ %6 = getelementptr inbounds i32, ptr %0, i32 6
+ %7 = load i32, ptr %6, align 4
+ %8 = getelementptr inbounds i32, ptr %0, i32 7
+ %9 = load i32, ptr %8, align 4
+ %10 = getelementptr inbounds i32, ptr %0, i32 8
+ %11 = load i32, ptr %10, align 4
+ %12 = getelementptr inbounds i32, ptr %0, i32 9
+ %13 = load i32, ptr %12, align 4
+ %14 = getelementptr inbounds i32, ptr %0, i32 10
+ %15 = load i32, ptr %14, align 4
+ %16 = getelementptr inbounds i32, ptr %0, i32 11
+ %17 = load i32, ptr %16, align 4
+ %18 = getelementptr inbounds i32, ptr %0, i32 12
+ %19 = load i32, ptr %18, align 4
+ %20 = getelementptr inbounds i32, ptr %0, i32 13
+ %21 = load i32, ptr %20, align 4
+ %22 = getelementptr inbounds i32, ptr %0, i32 14
+ %23 = load i32, ptr %22, align 4
+ %24 = getelementptr inbounds i32, ptr %0, i32 15
+ %25 = load i32, ptr %24, align 4
+ %26 = load i32, ptr %0, align 4
+ %27 = getelementptr inbounds i32, ptr %0, i32 1
+ %28 = load i32, ptr %27, align 4
+ %29 = add i32 %3, %5
+ %30 = add i32 %7, %9
+ %31 = add i32 %11, %13
+ %32 = add i32 %15, %17
+ %33 = add i32 %19, %21
+ %34 = add i32 %23, %25
+ %35 = add i32 %26, %28
+ %36 = add i32 %29, %30
+ %37 = add i32 %31, %32
+ %38 = add i32 %33, %34
+ %39 = add i32 %35, %36
+ %40 = add i32 %37, %38
+ %41 = add i32 %39, %40
+ ret i32 %41
+ }
+
+ define i32 @reverse_order_loads(ptr %0) {
+ %2 = getelementptr inbounds i32, ptr %0, i32 1
+ %3 = load i32, ptr %2, align 4
+ %4 = load i32, ptr %0, align 4
+ %5 = add i32 %3, %4
+ ret i32 %5
+ }
+
+ define i32 @offset_calculation(ptr %0) {
+ %2 = getelementptr inbounds i8, ptr %0, i32 100
+ %3 = load i32, ptr %2, align 4
+ %4 = getelementptr inbounds i8, ptr %0, i32 104
+ %5 = load i32, ptr %4, align 4
+ %6 = add i32 %3, %5
+ ret i32 %6
+ }
+
+ define i32 @large_offsets(ptr %0) {
+ %2 = getelementptr inbounds i8, ptr %0, i32 2040
+ %3 = load i32, ptr %2, align 4
+ %4 = getelementptr inbounds i8, ptr %0, i32 2044
+ %5 = load i32, ptr %4, align 4
+ %6 = add i32 %3, %5
+ ret i32 %6
+ }
+
+ define i32 @negative_offsets(ptr %0) {
+ %2 = getelementptr inbounds i8, ptr %0, i32 -8
+ %3 = load i32, ptr %2, align 4
+ %4 = getelementptr inbounds i8, ptr %0, i32 -4
+ %5 = load i32, ptr %4, align 4
+ %6 = add i32 %3, %5
+ ret i32 %6
+ }
+
+ define i32 @volatile_loads(ptr %0) {
+ %2 = load volatile i32, ptr %0, align 4
+ %3 = getelementptr inbounds i32, ptr %0, i32 1
+ %4 = load volatile i32, ptr %3, align 4
+ %5 = add i32 %2, %4
+ ret i32 %5
+ }
+
+ define i32 @store_dependency(ptr %0, i32 %1) {
+ %3 = load i32, ptr %0, align 4
+ %4 = getelementptr inbounds i32, ptr %0, i32 1
+ store i32 %1, ptr %4, align 4
+ %5 = load i32, ptr %4, align 4
+ %6 = add i32 %3, %5
+ ret i32 %6
+ }
+
+ define i32 @three_loads(ptr %0) {
+ %2 = load i32, ptr %0, align 4
+ %3 = getelementptr inbounds i32, ptr %0, i32 1
+ %4 = load i32, ptr %3, align 4
+ %5 = getelementptr inbounds i32, ptr %0, i32 2
+ %6 = load i32, ptr %5, align 4
+ %7 = add i32 %2, %4
+ %8 = add i32 %7, %6
+ ret i32 %8
+ }
+
+ define i32 @distance_exceeds_max(ptr %0, i32 %1) {
+ %3 = load i32, ptr %0, align 4
+ %4 = add i32 %3, %1
+ %5 = add i32 %4, %1
+ %6 = add i32 %5, %1
+ %7 = add i32 %6, %1
+ %8 = add i32 %7, %1
+ %9 = add i32 %8, %1
+ %10 = add i32 %9, %1
+ %11 = add i32 %10, %1
+ %12 = add i32 %11, %1
+ %13 = add i32 %12, %1
+ %14 = add i32 %13, %1
+ %15 = getelementptr inbounds i32, ptr %0, i32 1
+ %16 = load i32, ptr %15, align 4
+ %17 = add i32 %14, %16
+ ret i32 %17
+ }
+
+ @global_var = external global [100 x i32]
+
+ define i32 @symbolic_operands_global() {
+ ret i32 0
+ }
+
+ define i32 @symbolic_operands_
diff erent_globals() {
+ ret i32 0
+ }
+
+ define i32 @symbolic_operands_constantpool() {
+ ret i32 0
+ }
+
+ define i32 @symbolic_operands_interleave() {
+ ret i32 0
+ }
+---
+# Basic case: two consecutive 32-bit loads that can be combined into LD
+name: basic_load_combine
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: basic_load_combine
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32))
+ ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32))
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]]
+ ; CHECK-NEXT: PseudoRET
+ ;
+ ; CHECK-4BYTE-LABEL: name: basic_load_combine
+ ; CHECK-4BYTE: liveins: $x10
+ ; CHECK-4BYTE-NEXT: {{ $}}
+ ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 0 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT]], [[PseudoLD_RV32_OPT1]]
+ ; CHECK-4BYTE-NEXT: PseudoRET
+ %0:gpr = COPY $x10
+ %1:gpr = LW %0, 0 :: (load (s32))
+ %2:gpr = LW %0, 4 :: (load (s32))
+ %3:gpr = ADD %1, %2
+ PseudoRET
+
+...
+---
+# Basic case: two consecutive 32-bit stores that can be combined into SD
+name: basic_store_combine
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+ - { reg: '$x11', virtual-reg: '%1' }
+ - { reg: '$x12', virtual-reg: '%2' }
+body: |
+ bb.0:
+ liveins: $x10, $x11, $x12
+
+ ; CHECK-LABEL: name: basic_store_combine
+ ; CHECK: liveins: $x10, $x11, $x12
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x12
+ ; CHECK-NEXT: SW [[COPY1]], [[COPY]], 0 :: (store (s32))
+ ; CHECK-NEXT: SW [[COPY2]], [[COPY]], 4 :: (store (s32))
+ ; CHECK-NEXT: PseudoRET
+ ;
+ ; CHECK-4BYTE-LABEL: name: basic_store_combine
+ ; CHECK-4BYTE: liveins: $x10, $x11, $x12
+ ; CHECK-4BYTE-NEXT: {{ $}}
+ ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-4BYTE-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-4BYTE-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x12
+ ; CHECK-4BYTE-NEXT: PseudoSD_RV32_OPT [[COPY1]], [[COPY2]], [[COPY]], 0 :: (store (s32))
+ ; CHECK-4BYTE-NEXT: PseudoRET
+ %0:gpr = COPY $x10
+ %1:gpr = COPY $x11
+ %2:gpr = COPY $x12
+ SW %1, %0, 0 :: (store (s32))
+ SW %2, %0, 4 :: (store (s32))
+ PseudoRET
+
+...
+---
+name: basic_load_combine_8_byte_aligned
+alignment: 8
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: basic_load_combine_8_byte_aligned
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 0 :: (load (s32), align 8), (load (s32))
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT]], [[PseudoLD_RV32_OPT1]]
+ ; CHECK-NEXT: PseudoRET
+ ;
+ ; CHECK-4BYTE-LABEL: name: basic_load_combine_8_byte_aligned
+ ; CHECK-4BYTE: liveins: $x10
+ ; CHECK-4BYTE-NEXT: {{ $}}
+ ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 0 :: (load (s32), align 8), (load (s32))
+ ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT]], [[PseudoLD_RV32_OPT1]]
+ ; CHECK-4BYTE-NEXT: PseudoRET
+ %0:gpr = COPY $x10
+ %1:gpr = LW %0, 0 :: (load (s32), align 8)
+ %2:gpr = LW %0, 4 :: (load (s32))
+ %3:gpr = ADD %1, %2
+ PseudoRET
+
+...
+---
+# Basic case: two consecutive 32-bit stores that can be combined into SD
+name: basic_store_combine_8_byte_aligned
+alignment: 8
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+ - { reg: '$x11', virtual-reg: '%1' }
+ - { reg: '$x12', virtual-reg: '%2' }
+body: |
+ bb.0:
+ liveins: $x10, $x11, $x12
+
+ ; CHECK-LABEL: name: basic_store_combine_8_byte_aligned
+ ; CHECK: liveins: $x10, $x11, $x12
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x12
+ ; CHECK-NEXT: PseudoSD_RV32_OPT [[COPY1]], [[COPY2]], [[COPY]], 0 :: (store (s32), align 8), (store (s32))
+ ; CHECK-NEXT: PseudoRET
+ ;
+ ; CHECK-4BYTE-LABEL: name: basic_store_combine_8_byte_aligned
+ ; CHECK-4BYTE: liveins: $x10, $x11, $x12
+ ; CHECK-4BYTE-NEXT: {{ $}}
+ ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-4BYTE-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-4BYTE-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x12
+ ; CHECK-4BYTE-NEXT: PseudoSD_RV32_OPT [[COPY1]], [[COPY2]], [[COPY]], 0 :: (store (s32), align 8), (store (s32))
+ ; CHECK-4BYTE-NEXT: PseudoRET
+ %0:gpr = COPY $x10
+ %1:gpr = COPY $x11
+ %2:gpr = COPY $x12
+ SW %1, %0, 0 :: (store (s32), align 8)
+ SW %2, %0, 4 :: (store (s32))
+ PseudoRET
+
+...
+---
+# Non-consecutive offsets - should not combine
+name: non_consecutive_offsets
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: non_consecutive_offsets
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32))
+ ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 8 :: (load (s32))
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]]
+ ; CHECK-NEXT: PseudoRET
+ ;
+ ; CHECK-4BYTE-LABEL: name: non_consecutive_offsets
+ ; CHECK-4BYTE: liveins: $x10
+ ; CHECK-4BYTE-NEXT: {{ $}}
+ ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-4BYTE-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 8 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]]
+ ; CHECK-4BYTE-NEXT: PseudoRET
+ %0:gpr = COPY $x10
+ ; Should not combine - offset gap is 8, not 4
+ %1:gpr = LW %0, 0 :: (load (s32))
+ %2:gpr = LW %0, 8 :: (load (s32))
+ %3:gpr = ADD %1, %2
+ PseudoRET
+
+...
+---
+# Different base registers - should not combine
+name:
diff erent_base_regs
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+ - { reg: '$x11', virtual-reg: '%1' }
+body: |
+ bb.0:
+ liveins: $x10, $x11
+
+ ; CHECK-LABEL: name:
diff erent_base_regs
+ ; CHECK: liveins: $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32))
+ ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY1]], 4 :: (load (s32))
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]]
+ ; CHECK-NEXT: PseudoRET
+ ;
+ ; CHECK-4BYTE-LABEL: name:
diff erent_base_regs
+ ; CHECK-4BYTE: liveins: $x10, $x11
+ ; CHECK-4BYTE-NEXT: {{ $}}
+ ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-4BYTE-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-4BYTE-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY1]], 4 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]]
+ ; CHECK-4BYTE-NEXT: PseudoRET
+ %0:gpr = COPY $x10
+ %1:gpr = COPY $x11
+ ; Should not combine -
diff erent base registers
+ %2:gpr = LW %0, 0 :: (load (s32))
+ %3:gpr = LW %1, 4 :: (load (s32))
+ %4:gpr = ADD %2, %3
+ PseudoRET
+
+...
+---
+# Call instruction blocks optimization
+name: call_blocks_optimization
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: call_blocks_optimization
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32))
+ ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) @external_func, csr_ilp32_lp64, implicit-def $x1
+ ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32))
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]]
+ ; CHECK-NEXT: PseudoRET
+ ;
+ ; CHECK-4BYTE-LABEL: name: call_blocks_optimization
+ ; CHECK-4BYTE: liveins: $x10
+ ; CHECK-4BYTE-NEXT: {{ $}}
+ ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-4BYTE-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: PseudoCALL target-flags(riscv-call) @external_func, csr_ilp32_lp64, implicit-def $x1
+ ; CHECK-4BYTE-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]]
+ ; CHECK-4BYTE-NEXT: PseudoRET
+ %0:gpr = COPY $x10
+ %1:gpr = LW %0, 0 :: (load (s32))
+ ; Call instruction should block combining across it
+ PseudoCALL target-flags(riscv-call) @external_func, csr_ilp32_lp64, implicit-def $x1
+ %2:gpr = LW %0, 4 :: (load (s32))
+ %3:gpr = ADD %1, %2
+ PseudoRET
+
+...
+---
+# Terminator instruction blocks optimization
+name: terminator_blocks_optimization
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+ - { reg: '$x11', virtual-reg: '%1' }
+body: |
+ ; CHECK-LABEL: name: terminator_blocks_optimization
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32))
+ ; CHECK-NEXT: BEQ [[COPY1]], [[LW]], %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32))
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]]
+ ; CHECK-NEXT: PseudoRET
+ ;
+ ; CHECK-4BYTE-LABEL: name: terminator_blocks_optimization
+ ; CHECK-4BYTE: bb.0:
+ ; CHECK-4BYTE-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-4BYTE-NEXT: liveins: $x10, $x11
+ ; CHECK-4BYTE-NEXT: {{ $}}
+ ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-4BYTE-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-4BYTE-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: BEQ [[COPY1]], [[LW]], %bb.1
+ ; CHECK-4BYTE-NEXT: {{ $}}
+ ; CHECK-4BYTE-NEXT: bb.1:
+ ; CHECK-4BYTE-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]]
+ ; CHECK-4BYTE-NEXT: PseudoRET
+ bb.0:
+ liveins: $x10, $x11
+
+ %0:gpr = COPY $x10
+ %1:gpr = COPY $x11
+ %2:gpr = LW %0, 0 :: (load (s32))
+ BEQ %1, %2, %bb.1
+
+ bb.1:
+ ; Should not combine across basic block boundary
+ %3:gpr = LW %0, 4 :: (load (s32))
+ %4:gpr = ADD %2, %3
+ PseudoRET
+
+...
+---
+# Memory aliasing - store between loads with unknown base should prevent combining
+name: memory_aliasing
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+ - { reg: '$x11', virtual-reg: '%1' }
+ - { reg: '$x12', virtual-reg: '%2' }
+body: |
+ bb.0:
+ liveins: $x10, $x11, $x12
+
+ ; CHECK-LABEL: name: memory_aliasing
+ ; CHECK: liveins: $x10, $x11, $x12
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x12
+ ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32))
+ ; CHECK-NEXT: SW [[COPY1]], [[COPY2]], 8 :: (store (s32))
+ ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32))
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]]
+ ; CHECK-NEXT: PseudoRET
+ ;
+ ; CHECK-4BYTE-LABEL: name: memory_aliasing
+ ; CHECK-4BYTE: liveins: $x10, $x11, $x12
+ ; CHECK-4BYTE-NEXT: {{ $}}
+ ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-4BYTE-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-4BYTE-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x12
+ ; CHECK-4BYTE-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: SW [[COPY1]], [[COPY2]], 8 :: (store (s32))
+ ; CHECK-4BYTE-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]]
+ ; CHECK-4BYTE-NEXT: PseudoRET
+ %0:gpr = COPY $x10
+ %1:gpr = COPY $x11
+ %2:gpr = COPY $x12
+ %3:gpr = LW %0, 0 :: (load (s32))
+ SW %1, %2, 8 :: (store (s32))
+ %4:gpr = LW %0, 4 :: (load (s32))
+ %5:gpr = ADD %3, %4
+ PseudoRET
+
+...
+---
+# Multiple pairs in same function
+name: multiple_pairs
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+ - { reg: '$x11', virtual-reg: '%1' }
+body: |
+ bb.0:
+ liveins: $x10, $x11
+
+ ; CHECK-LABEL: name: multiple_pairs
+ ; CHECK: liveins: $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32))
+ ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32))
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]]
+ ; CHECK-NEXT: [[LW2:%[0-9]+]]:gpr = LW [[COPY1]], 8 :: (load (s32))
+ ; CHECK-NEXT: [[LW3:%[0-9]+]]:gpr = LW [[COPY1]], 12 :: (load (s32))
+ ; CHECK-NEXT: [[ADD1:%[0-9]+]]:gpr = ADD [[LW2]], [[LW3]]
+ ; CHECK-NEXT: [[ADD2:%[0-9]+]]:gpr = ADD [[ADD]], [[ADD1]]
+ ; CHECK-NEXT: PseudoRET
+ ;
+ ; CHECK-4BYTE-LABEL: name: multiple_pairs
+ ; CHECK-4BYTE: liveins: $x10, $x11
+ ; CHECK-4BYTE-NEXT: {{ $}}
+ ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-4BYTE-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 0 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT]], [[PseudoLD_RV32_OPT1]]
+ ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT2:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT3:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY1]], 8 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: [[ADD1:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT2]], [[PseudoLD_RV32_OPT3]]
+ ; CHECK-4BYTE-NEXT: [[ADD2:%[0-9]+]]:gpr = ADD [[ADD]], [[ADD1]]
+ ; CHECK-4BYTE-NEXT: PseudoRET
+ %0:gpr = COPY $x10
+ %1:gpr = COPY $x11
+ ; First pair should combine
+ %2:gpr = LW %0, 0 :: (load (s32))
+ %3:gpr = LW %0, 4 :: (load (s32))
+ %4:gpr = ADD %2, %3
+
+ ; Second pair should also combine
+ %5:gpr = LW %1, 8 :: (load (s32))
+ %6:gpr = LW %1, 12 :: (load (s32))
+ %7:gpr = ADD %5, %6
+ %8:gpr = ADD %4, %7
+ PseudoRET
+
+...
+---
+# Many loads test
+name: many_loads
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: many_loads
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 16 :: (load (s32))
+ ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 20 :: (load (s32))
+ ; CHECK-NEXT: [[LW2:%[0-9]+]]:gpr = LW [[COPY]], 24 :: (load (s32))
+ ; CHECK-NEXT: [[LW3:%[0-9]+]]:gpr = LW [[COPY]], 28 :: (load (s32))
+ ; CHECK-NEXT: [[LW4:%[0-9]+]]:gpr = LW [[COPY]], 32 :: (load (s32))
+ ; CHECK-NEXT: [[LW5:%[0-9]+]]:gpr = LW [[COPY]], 36 :: (load (s32))
+ ; CHECK-NEXT: [[LW6:%[0-9]+]]:gpr = LW [[COPY]], 40 :: (load (s32))
+ ; CHECK-NEXT: [[LW7:%[0-9]+]]:gpr = LW [[COPY]], 44 :: (load (s32))
+ ; CHECK-NEXT: [[LW8:%[0-9]+]]:gpr = LW [[COPY]], 48 :: (load (s32))
+ ; CHECK-NEXT: [[LW9:%[0-9]+]]:gpr = LW [[COPY]], 52 :: (load (s32))
+ ; CHECK-NEXT: [[LW10:%[0-9]+]]:gpr = LW [[COPY]], 56 :: (load (s32))
+ ; CHECK-NEXT: [[LW11:%[0-9]+]]:gpr = LW [[COPY]], 60 :: (load (s32))
+ ; CHECK-NEXT: [[LW12:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32))
+ ; CHECK-NEXT: [[LW13:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32))
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]]
+ ; CHECK-NEXT: [[ADD1:%[0-9]+]]:gpr = ADD [[LW2]], [[LW3]]
+ ; CHECK-NEXT: [[ADD2:%[0-9]+]]:gpr = ADD [[LW4]], [[LW5]]
+ ; CHECK-NEXT: [[ADD3:%[0-9]+]]:gpr = ADD [[LW6]], [[LW7]]
+ ; CHECK-NEXT: [[ADD4:%[0-9]+]]:gpr = ADD [[LW8]], [[LW9]]
+ ; CHECK-NEXT: [[ADD5:%[0-9]+]]:gpr = ADD [[LW10]], [[LW11]]
+ ; CHECK-NEXT: [[ADD6:%[0-9]+]]:gpr = ADD [[LW12]], [[LW13]]
+ ; CHECK-NEXT: [[ADD7:%[0-9]+]]:gpr = ADD [[ADD]], [[ADD1]]
+ ; CHECK-NEXT: [[ADD8:%[0-9]+]]:gpr = ADD [[ADD2]], [[ADD3]]
+ ; CHECK-NEXT: [[ADD9:%[0-9]+]]:gpr = ADD [[ADD4]], [[ADD5]]
+ ; CHECK-NEXT: [[ADD10:%[0-9]+]]:gpr = ADD [[ADD6]], [[ADD7]]
+ ; CHECK-NEXT: [[ADD11:%[0-9]+]]:gpr = ADD [[ADD8]], [[ADD9]]
+ ; CHECK-NEXT: [[ADD12:%[0-9]+]]:gpr = ADD [[ADD10]], [[ADD11]]
+ ; CHECK-NEXT: PseudoRET
+ ;
+ ; CHECK-4BYTE-LABEL: name: many_loads
+ ; CHECK-4BYTE: liveins: $x10
+ ; CHECK-4BYTE-NEXT: {{ $}}
+ ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 16 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT2:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT3:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 24 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT4:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT5:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 32 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT6:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT7:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 40 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT8:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT9:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 48 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT10:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT11:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 56 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT12:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT13:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 0 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT]], [[PseudoLD_RV32_OPT1]]
+ ; CHECK-4BYTE-NEXT: [[ADD1:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT2]], [[PseudoLD_RV32_OPT3]]
+ ; CHECK-4BYTE-NEXT: [[ADD2:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT4]], [[PseudoLD_RV32_OPT5]]
+ ; CHECK-4BYTE-NEXT: [[ADD3:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT6]], [[PseudoLD_RV32_OPT7]]
+ ; CHECK-4BYTE-NEXT: [[ADD4:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT8]], [[PseudoLD_RV32_OPT9]]
+ ; CHECK-4BYTE-NEXT: [[ADD5:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT10]], [[PseudoLD_RV32_OPT11]]
+ ; CHECK-4BYTE-NEXT: [[ADD6:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT12]], [[PseudoLD_RV32_OPT13]]
+ ; CHECK-4BYTE-NEXT: [[ADD7:%[0-9]+]]:gpr = ADD [[ADD]], [[ADD1]]
+ ; CHECK-4BYTE-NEXT: [[ADD8:%[0-9]+]]:gpr = ADD [[ADD2]], [[ADD3]]
+ ; CHECK-4BYTE-NEXT: [[ADD9:%[0-9]+]]:gpr = ADD [[ADD4]], [[ADD5]]
+ ; CHECK-4BYTE-NEXT: [[ADD10:%[0-9]+]]:gpr = ADD [[ADD6]], [[ADD7]]
+ ; CHECK-4BYTE-NEXT: [[ADD11:%[0-9]+]]:gpr = ADD [[ADD8]], [[ADD9]]
+ ; CHECK-4BYTE-NEXT: [[ADD12:%[0-9]+]]:gpr = ADD [[ADD10]], [[ADD11]]
+ ; CHECK-4BYTE-NEXT: PseudoRET
+ %0:gpr = COPY $x10
+ ; Create high register pressure with many live values
+ %1:gpr = LW %0, 16 :: (load (s32))
+ %2:gpr = LW %0, 20 :: (load (s32))
+ %3:gpr = LW %0, 24 :: (load (s32))
+ %4:gpr = LW %0, 28 :: (load (s32))
+ %5:gpr = LW %0, 32 :: (load (s32))
+ %6:gpr = LW %0, 36 :: (load (s32))
+ %7:gpr = LW %0, 40 :: (load (s32))
+ %8:gpr = LW %0, 44 :: (load (s32))
+ %9:gpr = LW %0, 48 :: (load (s32))
+ %10:gpr = LW %0, 52 :: (load (s32))
+ %11:gpr = LW %0, 56 :: (load (s32))
+ %12:gpr = LW %0, 60 :: (load (s32))
+
+ ; With high register pressure, these loads might not be combined
+ ; depending on the profitability analysis
+ %13:gpr = LW %0, 0 :: (load (s32))
+ %14:gpr = LW %0, 4 :: (load (s32))
+
+ ; Use all the loaded values to keep them live
+ %15:gpr = ADD %1, %2
+ %16:gpr = ADD %3, %4
+ %17:gpr = ADD %5, %6
+ %18:gpr = ADD %7, %8
+ %19:gpr = ADD %9, %10
+ %20:gpr = ADD %11, %12
+ %21:gpr = ADD %13, %14
+ %22:gpr = ADD %15, %16
+ %23:gpr = ADD %17, %18
+ %24:gpr = ADD %19, %20
+ %25:gpr = ADD %21, %22
+ %26:gpr = ADD %23, %24
+ %27:gpr = ADD %25, %26
+ PseudoRET
+
+...
+---
+# Test reverse order - second load has lower offset than first
+name: reverse_order_loads
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: reverse_order_loads
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32))
+ ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32))
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]]
+ ; CHECK-NEXT: PseudoRET
+ ;
+ ; CHECK-4BYTE-LABEL: name: reverse_order_loads
+ ; CHECK-4BYTE: liveins: $x10
+ ; CHECK-4BYTE-NEXT: {{ $}}
+ ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 0 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT1]], [[PseudoLD_RV32_OPT]]
+ ; CHECK-4BYTE-NEXT: PseudoRET
+ %0:gpr = COPY $x10
+ ; Load at higher offset first, then lower offset
+ ; Should still be combined as LD with lower offset
+ %1:gpr = LW %0, 4 :: (load (s32))
+ %2:gpr = LW %0, 0 :: (load (s32))
+ %3:gpr = ADD %1, %2
+ PseudoRET
+
+...
+---
+# Test with immediate offset calculation
+name: offset_calculation
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: offset_calculation
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 100 :: (load (s32))
+ ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 104 :: (load (s32))
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]]
+ ; CHECK-NEXT: PseudoRET
+ ;
+ ; CHECK-4BYTE-LABEL: name: offset_calculation
+ ; CHECK-4BYTE: liveins: $x10
+ ; CHECK-4BYTE-NEXT: {{ $}}
+ ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 100 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT]], [[PseudoLD_RV32_OPT1]]
+ ; CHECK-4BYTE-NEXT: PseudoRET
+ %0:gpr = COPY $x10
+ ; Test with
diff erent immediate values that are consecutive
+ %1:gpr = LW %0, 100 :: (load (s32))
+ %2:gpr = LW %0, 104 :: (load (s32))
+ %3:gpr = ADD %1, %2
+ PseudoRET
+
+...
+---
+# Test large offset values
+name: large_offsets
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: large_offsets
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 2040 :: (load (s32))
+ ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 2044 :: (load (s32))
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]]
+ ; CHECK-NEXT: PseudoRET
+ ;
+ ; CHECK-4BYTE-LABEL: name: large_offsets
+ ; CHECK-4BYTE: liveins: $x10
+ ; CHECK-4BYTE-NEXT: {{ $}}
+ ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 2040 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT]], [[PseudoLD_RV32_OPT1]]
+ ; CHECK-4BYTE-NEXT: PseudoRET
+ %0:gpr = COPY $x10
+ ; Test with large offset values
+ %1:gpr = LW %0, 2040 :: (load (s32))
+ %2:gpr = LW %0, 2044 :: (load (s32))
+ %3:gpr = ADD %1, %2
+ PseudoRET
+
+...
+---
+# Test with negative offsets
+name: negative_offsets
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: negative_offsets
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], -8 :: (load (s32))
+ ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], -4 :: (load (s32))
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]]
+ ; CHECK-NEXT: PseudoRET
+ ;
+ ; CHECK-4BYTE-LABEL: name: negative_offsets
+ ; CHECK-4BYTE: liveins: $x10
+ ; CHECK-4BYTE-NEXT: {{ $}}
+ ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], -8 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT]], [[PseudoLD_RV32_OPT1]]
+ ; CHECK-4BYTE-NEXT: PseudoRET
+ %0:gpr = COPY $x10
+ ; Test with negative consecutive offsets
+ %1:gpr = LW %0, -8 :: (load (s32))
+ %2:gpr = LW %0, -4 :: (load (s32))
+ %3:gpr = ADD %1, %2
+ PseudoRET
+
+...
+---
+# Test with volatile loads - should not combine
+name: volatile_loads
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: volatile_loads
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (volatile load (s32))
+ ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (volatile load (s32))
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]]
+ ; CHECK-NEXT: PseudoRET
+ ;
+ ; CHECK-4BYTE-LABEL: name: volatile_loads
+ ; CHECK-4BYTE: liveins: $x10
+ ; CHECK-4BYTE-NEXT: {{ $}}
+ ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-4BYTE-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (volatile load (s32))
+ ; CHECK-4BYTE-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (volatile load (s32))
+ ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]]
+ ; CHECK-4BYTE-NEXT: PseudoRET
+ %0:gpr = COPY $x10
+ ; Volatile loads should not be combined
+ %1:gpr = LW %0, 0 :: (volatile load (s32))
+ %2:gpr = LW %0, 4 :: (volatile load (s32))
+ %3:gpr = ADD %1, %2
+ PseudoRET
+
+...
+---
+# Test store dependency - store modifies same location as load
+name: store_dependency
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+ - { reg: '$x11', virtual-reg: '%1' }
+body: |
+ bb.0:
+ liveins: $x10, $x11
+
+ ; CHECK-LABEL: name: store_dependency
+ ; CHECK: liveins: $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32))
+ ; CHECK-NEXT: SW [[COPY1]], [[COPY]], 4 :: (store (s32))
+ ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32))
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]]
+ ; CHECK-NEXT: PseudoRET
+ ;
+ ; CHECK-4BYTE-LABEL: name: store_dependency
+ ; CHECK-4BYTE: liveins: $x10, $x11
+ ; CHECK-4BYTE-NEXT: {{ $}}
+ ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-4BYTE-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-4BYTE-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: SW [[COPY1]], [[COPY]], 4 :: (store (s32))
+ ; CHECK-4BYTE-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]]
+ ; CHECK-4BYTE-NEXT: PseudoRET
+ %0:gpr = COPY $x10
+ %1:gpr = COPY $x11
+ %2:gpr = LW %0, 0 :: (load (s32))
+ ; Store to same location as second load - should prevent combination
+ SW %1, %0, 4 :: (store (s32))
+ %3:gpr = LW %0, 4 :: (load (s32))
+ %4:gpr = ADD %2, %3
+ PseudoRET
+
+...
+---
+# Test three loads:
+# align 8: last two should combine
+# align 4: first two should combine
+name: three_loads
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: three_loads
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32))
+ ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32))
+ ; CHECK-NEXT: [[LW2:%[0-9]+]]:gpr = LW [[COPY]], 8 :: (load (s32))
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]]
+ ; CHECK-NEXT: [[ADD1:%[0-9]+]]:gpr = ADD [[ADD]], [[LW2]]
+ ; CHECK-NEXT: PseudoRET
+ ;
+ ; CHECK-4BYTE-LABEL: name: three_loads
+ ; CHECK-4BYTE: liveins: $x10
+ ; CHECK-4BYTE-NEXT: {{ $}}
+ ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 0 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 8 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT]], [[PseudoLD_RV32_OPT1]]
+ ; CHECK-4BYTE-NEXT: [[ADD1:%[0-9]+]]:gpr = ADD [[ADD]], [[LW]]
+ ; CHECK-4BYTE-NEXT: PseudoRET
+ %0:gpr = COPY $x10
+ ; First two loads should combine, third should remain separate
+ %1:gpr = LW %0, 0 :: (load (s32))
+ %2:gpr = LW %0, 4 :: (load (s32))
+ %3:gpr = LW %0, 8 :: (load (s32))
+ %4:gpr = ADD %1, %2
+ %5:gpr = ADD %4, %3
+ PseudoRET
+...
+---
+# Test where distance between loads exceeds MaxRescheduleDistance
+name: distance_exceeds_max
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+ - { reg: '$x11', virtual-reg: '%1' }
+body: |
+ bb.0:
+ liveins: $x10, $x11
+
+ ; CHECK-LABEL: name: distance_exceeds_max
+ ; CHECK: liveins: $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32))
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[COPY1]]
+ ; CHECK-NEXT: [[ADD1:%[0-9]+]]:gpr = ADD [[ADD]], [[COPY1]]
+ ; CHECK-NEXT: [[ADD2:%[0-9]+]]:gpr = ADD [[ADD1]], [[COPY1]]
+ ; CHECK-NEXT: [[ADD3:%[0-9]+]]:gpr = ADD [[ADD2]], [[COPY1]]
+ ; CHECK-NEXT: [[ADD4:%[0-9]+]]:gpr = ADD [[ADD3]], [[COPY1]]
+ ; CHECK-NEXT: [[ADD5:%[0-9]+]]:gpr = ADD [[ADD4]], [[COPY1]]
+ ; CHECK-NEXT: [[ADD6:%[0-9]+]]:gpr = ADD [[ADD5]], [[COPY1]]
+ ; CHECK-NEXT: [[ADD7:%[0-9]+]]:gpr = ADD [[ADD6]], [[COPY1]]
+ ; CHECK-NEXT: [[ADD8:%[0-9]+]]:gpr = ADD [[ADD7]], [[COPY1]]
+ ; CHECK-NEXT: [[ADD9:%[0-9]+]]:gpr = ADD [[ADD8]], [[COPY1]]
+ ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32))
+ ; CHECK-NEXT: PseudoRET
+ ;
+ ; CHECK-4BYTE-LABEL: name: distance_exceeds_max
+ ; CHECK-4BYTE: liveins: $x10, $x11
+ ; CHECK-4BYTE-NEXT: {{ $}}
+ ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-4BYTE-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-4BYTE-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[COPY1]]
+ ; CHECK-4BYTE-NEXT: [[ADD1:%[0-9]+]]:gpr = ADD [[ADD]], [[COPY1]]
+ ; CHECK-4BYTE-NEXT: [[ADD2:%[0-9]+]]:gpr = ADD [[ADD1]], [[COPY1]]
+ ; CHECK-4BYTE-NEXT: [[ADD3:%[0-9]+]]:gpr = ADD [[ADD2]], [[COPY1]]
+ ; CHECK-4BYTE-NEXT: [[ADD4:%[0-9]+]]:gpr = ADD [[ADD3]], [[COPY1]]
+ ; CHECK-4BYTE-NEXT: [[ADD5:%[0-9]+]]:gpr = ADD [[ADD4]], [[COPY1]]
+ ; CHECK-4BYTE-NEXT: [[ADD6:%[0-9]+]]:gpr = ADD [[ADD5]], [[COPY1]]
+ ; CHECK-4BYTE-NEXT: [[ADD7:%[0-9]+]]:gpr = ADD [[ADD6]], [[COPY1]]
+ ; CHECK-4BYTE-NEXT: [[ADD8:%[0-9]+]]:gpr = ADD [[ADD7]], [[COPY1]]
+ ; CHECK-4BYTE-NEXT: [[ADD9:%[0-9]+]]:gpr = ADD [[ADD8]], [[COPY1]]
+ ; CHECK-4BYTE-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: PseudoRET
+ %0:gpr = COPY $x10
+ %1:gpr = COPY $x11
+ %2:gpr = LW %0, 0 :: (load (s32))
+ ; Insert 11 instructions between the two loads
+ ; This makes the distance greater than the configured MaxRescheduleDistance
+ %3:gpr = ADD %2, %1
+ %4:gpr = ADD %3, %1
+ %5:gpr = ADD %4, %1
+ %6:gpr = ADD %5, %1
+ %7:gpr = ADD %6, %1
+ %8:gpr = ADD %7, %1
+ %9:gpr = ADD %8, %1
+ %10:gpr = ADD %9, %1
+ %11:gpr = ADD %10, %1
+ %12:gpr = ADD %11, %1
+ ; Second load at offset 4 - too far from first load
+ %14:gpr = LW %0, 4 :: (load (s32))
+ PseudoRET
+...
+---
+# Test combining loads with symbolic operands (global address)
+name: symbolic_operands_global
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: symbolic_operands_global
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], target-flags(riscv-lo) @global_var :: (load (s32))
+ ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], target-flags(riscv-lo) @global_var + 4 :: (load (s32))
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]]
+ ; CHECK-NEXT: PseudoRET
+ ;
+ ; CHECK-4BYTE-LABEL: name: symbolic_operands_global
+ ; CHECK-4BYTE: liveins: $x10
+ ; CHECK-4BYTE-NEXT: {{ $}}
+ ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], target-flags(riscv-lo) @global_var :: (load (s32))
+ ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT]], [[PseudoLD_RV32_OPT1]]
+ ; CHECK-4BYTE-NEXT: PseudoRET
+ %0:gpr = COPY $x10
+ ; Two consecutive loads with symbolic global address operands
+ %1:gpr = LW %0, target-flags(riscv-lo) @global_var :: (load (s32))
+ %2:gpr = LW %0, target-flags(riscv-lo) @global_var + 4 :: (load (s32))
+ %3:gpr = ADD %1, %2
+ PseudoRET
+
+...
+---
+# Test that loads with
diff erent global symbols are not combined
+name: symbolic_operands_
diff erent_globals
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+stack:
+ - { id: 0, offset: -4, size: 4 }
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: symbolic_operands_
diff erent_globals
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], target-flags(riscv-lo) @global_var :: (load (s32))
+ ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], %stack.0 :: (load (s32))
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]]
+ ; CHECK-NEXT: PseudoRET
+ ;
+ ; CHECK-4BYTE-LABEL: name: symbolic_operands_
diff erent_globals
+ ; CHECK-4BYTE: liveins: $x10
+ ; CHECK-4BYTE-NEXT: {{ $}}
+ ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-4BYTE-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], target-flags(riscv-lo) @global_var :: (load (s32))
+ ; CHECK-4BYTE-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], %stack.0 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]]
+ ; CHECK-4BYTE-NEXT: PseudoRET
+ %0:gpr = COPY $x10
+ ; Should not combine -
diff erent symbol types
+ %1:gpr = LW %0, target-flags(riscv-lo) @global_var :: (load (s32))
+ %2:gpr = LW %0, %stack.0 :: (load (s32))
+ %3:gpr = ADD %1, %2
+ PseudoRET
+
+...
+---
+# Test combining loads with constant pool operands
+name: symbolic_operands_constantpool
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+constants:
+ - id: 0
+ value: 'double 3.140000e+00'
+ alignment: 8
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: symbolic_operands_constantpool
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], target-flags(riscv-lo) %const.0 :: (load (s32))
+ ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], target-flags(riscv-lo) %const.0 + 4 :: (load (s32))
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]]
+ ; CHECK-NEXT: PseudoRET
+ ;
+ ; CHECK-4BYTE-LABEL: name: symbolic_operands_constantpool
+ ; CHECK-4BYTE: liveins: $x10
+ ; CHECK-4BYTE-NEXT: {{ $}}
+ ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], target-flags(riscv-lo) %const.0 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT]], [[PseudoLD_RV32_OPT1]]
+ ; CHECK-4BYTE-NEXT: PseudoRET
+ %0:gpr = COPY $x10
+ ; Two consecutive loads with constant pool operands
+ %1:gpr = LW %0, target-flags(riscv-lo) %const.0 :: (load (s32))
+ %2:gpr = LW %0, target-flags(riscv-lo) %const.0 + 4 :: (load (s32))
+ %3:gpr = ADD %1, %2
+ PseudoRET
+
+...
+---
+name: symbolic_operands_interleave
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+ - { reg: '$x11', virtual-reg: '%1' }
+body: |
+ bb.0:
+ liveins: $x10, $x11
+
+ ; CHECK-LABEL: name: symbolic_operands_interleave
+ ; CHECK: liveins: $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], target-flags(riscv-lo) @global_var :: (load (s32))
+ ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32))
+ ; CHECK-NEXT: [[LW2:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32))
+ ; CHECK-NEXT: [[LW3:%[0-9]+]]:gpr = LW [[COPY]], target-flags(riscv-lo) @global_var + 4 :: (load (s32))
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW2]]
+ ; CHECK-NEXT: [[ADD1:%[0-9]+]]:gpr = ADD [[LW1]], [[LW3]]
+ ; CHECK-NEXT: PseudoRET
+ ;
+ ; CHECK-4BYTE-LABEL: name: symbolic_operands_interleave
+ ; CHECK-4BYTE: liveins: $x10, $x11
+ ; CHECK-4BYTE-NEXT: {{ $}}
+ ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-4BYTE-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], target-flags(riscv-lo) @global_var :: (load (s32))
+ ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT2:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT3:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 0 :: (load (s32))
+ ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT]], [[PseudoLD_RV32_OPT2]]
+ ; CHECK-4BYTE-NEXT: [[ADD1:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT3]], [[PseudoLD_RV32_OPT1]]
+ ; CHECK-4BYTE-NEXT: PseudoRET
+ %0:gpr = COPY $x10
+ %1:gpr = COPY $x11
+ %2:gpr = LW %0, target-flags(riscv-lo) @global_var :: (load (s32))
+ %3:gpr = LW %0, 4 :: (load (s32))
+ %4:gpr = LW %0, 0 :: (load (s32))
+ %5:gpr = LW %0, target-flags(riscv-lo) @global_var + 4 :: (load (s32))
+ %6:gpr = ADD %2, %4
+ %7:gpr = ADD %3, %5
+ PseudoRET
+
+...
diff --git a/llvm/test/CodeGen/RISCV/zilsd-regalloc-hints.mir b/llvm/test/CodeGen/RISCV/zilsd-regalloc-hints.mir
new file mode 100644
index 0000000000000..b3861c3098cea
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/zilsd-regalloc-hints.mir
@@ -0,0 +1,83 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=riscv32 -mattr=+zilsd -run-pass=greedy,virtregrewriter %s -o - | FileCheck --check-prefix=WITHOUT-HINT %s
+# RUN: llc -mtriple=riscv32 -mattr=+zilsd,+zilsd-4byte-align -run-pass=riscv-prera-zilsd-opt,greedy,virtregrewriter %s -o - | FileCheck --check-prefix=WITH-HINT %s
+
+--- |
+ define i32 @test_load_pair_hints(ptr %p) {
+ %v1 = load i32, ptr %p, align 4
+ %p2 = getelementptr inbounds i8, ptr %p, i32 4
+ %v2 = load i32, ptr %p2, align 4
+ %sum = add i32 %v1, %v2
+ ret i32 %sum
+ }
+
+ define void @test_store_pair_hints(ptr %p, i32 %a, i32 %b) {
+ store i32 %a, ptr %p, align 4
+ %p2 = getelementptr inbounds i8, ptr %p, i32 4
+ store i32 %b, ptr %p2, align 4
+ ret void
+ }
+...
+---
+# Test that load pairs get register hints and allocate consecutive registers
+# After register allocation, should either keep LD_RV32 or split back to LW
+name: test_load_pair_hints
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; WITHOUT-HINT-LABEL: name: test_load_pair_hints
+ ; WITHOUT-HINT: liveins: $x10
+ ; WITHOUT-HINT-NEXT: {{ $}}
+ ; WITHOUT-HINT-NEXT: renamable $x11 = LW renamable $x10, 0 :: (load (s32) from %ir.p)
+ ; WITHOUT-HINT-NEXT: renamable $x10 = LW killed renamable $x10, 4 :: (load (s32) from %ir.p2)
+ ; WITHOUT-HINT-NEXT: renamable $x10 = ADD killed renamable $x11, killed renamable $x10
+ ; WITHOUT-HINT-NEXT: PseudoRET implicit $x10
+ ;
+ ; WITH-HINT-LABEL: name: test_load_pair_hints
+ ; WITH-HINT: liveins: $x10
+ ; WITH-HINT-NEXT: {{ $}}
+ ; WITH-HINT-NEXT: renamable $x10, renamable $x11 = PseudoLD_RV32_OPT killed renamable $x10, 0 :: (load (s32) from %ir.p), (load (s32) from %ir.p2)
+ ; WITH-HINT-NEXT: renamable $x10 = ADD killed renamable $x10, killed renamable $x11
+ ; WITH-HINT-NEXT: PseudoRET implicit $x10
+ %10:gpr = COPY $x10
+ ; These two LW instructions at offset 0 and 4 should be combined
+ %0:gpr = LW %10, 0 :: (load (s32) from %ir.p)
+ %1:gpr = LW %10, 4 :: (load (s32) from %ir.p2)
+ %2:gpr = ADD %0, %1
+ $x10 = COPY %2
+ PseudoRET implicit $x10
+
+...
+---
+# Test that store pairs get register hints and allocate consecutive registers
+# After register allocation, should either keep SD_RV32 or split back to SW
+name: test_store_pair_hints
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10, $x11, $x12
+
+ ; WITHOUT-HINT-LABEL: name: test_store_pair_hints
+ ; WITHOUT-HINT: liveins: $x10, $x11, $x12
+ ; WITHOUT-HINT-NEXT: {{ $}}
+ ; WITHOUT-HINT-NEXT: SW killed renamable $x11, renamable $x10, 0 :: (store (s32) into %ir.p)
+ ; WITHOUT-HINT-NEXT: SW killed renamable $x12, killed renamable $x10, 4 :: (store (s32) into %ir.p2)
+ ; WITHOUT-HINT-NEXT: PseudoRET
+ ;
+ ; WITH-HINT-LABEL: name: test_store_pair_hints
+ ; WITH-HINT: liveins: $x10, $x11, $x12
+ ; WITH-HINT-NEXT: {{ $}}
+ ; WITH-HINT-NEXT: renamable $x14 = COPY $x11
+ ; WITH-HINT-NEXT: renamable $x15 = COPY $x12
+ ; WITH-HINT-NEXT: PseudoSD_RV32_OPT killed renamable $x14, killed renamable $x15, killed renamable $x10, 0 :: (store (s32) into %ir.p), (store (s32) into %ir.p2)
+ ; WITH-HINT-NEXT: PseudoRET
+ %10:gpr = COPY $x10
+ %11:gpr = COPY $x11
+ %12:gpr = COPY $x12
+ ; These two SW instructions at offset 0 and 4 should be combined
+ SW %11, %10, 0 :: (store (s32) into %ir.p)
+ SW %12, %10, 4 :: (store (s32) into %ir.p2)
+ PseudoRET
+...
More information about the llvm-commits
mailing list