[llvm] 5048a08 - [RISCV] Generate MIPS load/store pair instructions (#124717)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 7 00:21:40 PST 2025
Author: Djordje Todorovic
Date: 2025-03-07T09:21:36+01:00
New Revision: 5048a0858beb15bdd203dee89dd4df9b2a72ba5f
URL: https://github.com/llvm/llvm-project/commit/5048a0858beb15bdd203dee89dd4df9b2a72ba5f
DIFF: https://github.com/llvm/llvm-project/commit/5048a0858beb15bdd203dee89dd4df9b2a72ba5f.diff
LOG: [RISCV] Generate MIPS load/store pair instructions (#124717)
Introduce RISCVLoadStoreOptimizer MIR Pass that will do the
optimization. The load/store pairing pass identifies adjacent load/store
instructions operating on consecutive memory locations and merges them
into a single paired instruction.
This is part of MIPS extensions for the p8700 CPU.
Production of ldp/sdp instructions is OFF by default, since it is
beneficial for -Os only in the case of p8700 CPU.
Added:
llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
llvm/test/CodeGen/RISCV/load-store-pair.ll
Modified:
llvm/lib/Target/RISCV/CMakeLists.txt
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
llvm/lib/Target/RISCV/RISCVInstrInfo.h
llvm/lib/Target/RISCV/RISCVSubtarget.cpp
llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
llvm/test/CodeGen/RISCV/O3-pipeline.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
index 5d1ea50eba494..e8d00f4df7c86 100644
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -48,6 +48,7 @@ add_llvm_target(RISCVCodeGen
RISCVISelDAGToDAG.cpp
RISCVISelLowering.cpp
RISCVLandingPadSetup.cpp
+ RISCVLoadStoreOptimizer.cpp
RISCVMachineFunctionInfo.cpp
RISCVMergeBaseOffset.cpp
RISCVOptWInstrs.cpp
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index f767223f96cd2..2fdf6bd36e88f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -2767,6 +2767,42 @@ MachineInstr *RISCVInstrInfo::emitLdStWithAddr(MachineInstr &MemI,
.setMIFlags(MemI.getFlags());
}
+// TODO: At the moment, MIPS introduced paring of instructions operating with
+// word or double word. This should be extended with more instructions when more
+// vendors support load/store pairing.
+bool RISCVInstrInfo::isPairableLdStInstOpc(unsigned Opc) {
+ switch (Opc) {
+ default:
+ return false;
+ case RISCV::SW:
+ case RISCV::SD:
+ case RISCV::LD:
+ case RISCV::LW:
+ return true;
+ }
+}
+
+bool RISCVInstrInfo::isLdStSafeToPair(const MachineInstr &LdSt,
+ const TargetRegisterInfo *TRI) {
+ // If this is a volatile load/store, don't mess with it.
+ if (LdSt.hasOrderedMemoryRef() || LdSt.getNumExplicitOperands() != 3)
+ return false;
+
+ if (LdSt.getOperand(1).isFI())
+ return true;
+
+ assert(LdSt.getOperand(1).isReg() && "Expected a reg operand.");
+ // Can't cluster if the instruction modifies the base register
+ // or it is update form. e.g. ld x5,8(x5)
+ if (LdSt.modifiesRegister(LdSt.getOperand(1).getReg(), TRI))
+ return false;
+
+ if (!LdSt.getOperand(2).isImm())
+ return false;
+
+ return true;
+}
+
bool RISCVInstrInfo::getMemOperandsWithOffsetWidth(
const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 329ed22c6efe5..656cb38e11297 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -301,6 +301,12 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
bool isHighLatencyDef(int Opc) const override;
+ /// Return true if pairing the given load or store may be paired with another.
+ static bool isPairableLdStInstOpc(unsigned Opc);
+
+ static bool isLdStSafeToPair(const MachineInstr &LdSt,
+ const TargetRegisterInfo *TRI);
+
protected:
const RISCVSubtarget &STI;
diff --git a/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
new file mode 100644
index 0000000000000..46fd3158e07ca
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
@@ -0,0 +1,403 @@
+//===----- RISCVLoadStoreOptimizer.cpp ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Load/Store Pairing: It identifies pairs of load or store instructions
+// operating on consecutive memory locations and merges them into a single
+// paired instruction, leveraging hardware support for paired memory accesses.
+// Much of the pairing logic is adapted from the AArch64LoadStoreOpt pass.
+//
+// NOTE: The AArch64LoadStoreOpt pass performs additional optimizations such as
+// merging zero store instructions, promoting loads that read directly from a
+// preceding store, and merging base register updates with load/store
+// instructions (via pre-/post-indexed addressing). These advanced
+// transformations are not yet implemented in the RISC-V pass but represent
+// potential future enhancements for further optimizing RISC-V memory
+// operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVTargetMachine.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-load-store-opt"
+#define RISCV_LOAD_STORE_OPT_NAME "RISC-V Load / Store Optimizer"
+
+// The LdStLimit limits number of instructions how far we search for load/store
+// pairs.
+static cl::opt<unsigned> LdStLimit("riscv-load-store-scan-limit", cl::init(128),
+ cl::Hidden);
+
+namespace {
+
+struct RISCVLoadStoreOpt : public MachineFunctionPass {
+ static char ID;
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ RISCVLoadStoreOpt() : MachineFunctionPass(ID) {}
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AAResultsWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override { return RISCV_LOAD_STORE_OPT_NAME; }
+
+ // Find and pair load/store instructions.
+ bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
+
+ // Convert load/store pairs to single instructions.
+ bool tryConvertToLdStPair(MachineBasicBlock::iterator First,
+ MachineBasicBlock::iterator Second);
+
+ // Scan the instructions looking for a load/store that can be combined
+ // with the current instruction into a load/store pair.
+ // Return the matching instruction if one is found, else MBB->end().
+ MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
+ bool &MergeForward);
+
+ MachineBasicBlock::iterator
+ mergePairedInsns(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator Paired, bool MergeForward);
+
+private:
+ AliasAnalysis *AA;
+ MachineRegisterInfo *MRI;
+ const RISCVInstrInfo *TII;
+ const RISCVRegisterInfo *TRI;
+ LiveRegUnits ModifiedRegUnits, UsedRegUnits;
+};
+} // end anonymous namespace
+
+char RISCVLoadStoreOpt::ID = 0;
+INITIALIZE_PASS(RISCVLoadStoreOpt, DEBUG_TYPE, RISCV_LOAD_STORE_OPT_NAME, false,
+ false)
+
+bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+ if (skipFunction(Fn.getFunction()))
+ return false;
+ const RISCVSubtarget &Subtarget = Fn.getSubtarget<RISCVSubtarget>();
+ if (!Subtarget.useLoadStorePairs())
+ return false;
+
+ bool MadeChange = false;
+ TII = Subtarget.getInstrInfo();
+ TRI = Subtarget.getRegisterInfo();
+ MRI = &Fn.getRegInfo();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ ModifiedRegUnits.init(*TRI);
+ UsedRegUnits.init(*TRI);
+
+ for (MachineBasicBlock &MBB : Fn) {
+ LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
+
+ for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ MBBI != E;) {
+ if (TII->isPairableLdStInstOpc(MBBI->getOpcode()) &&
+ tryToPairLdStInst(MBBI))
+ MadeChange = true;
+ else
+ ++MBBI;
+ }
+ }
+ return MadeChange;
+}
+
+// Find loads and stores that can be merged into a single load or store pair
+// instruction.
+bool RISCVLoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+
+ // If this is volatile, it is not a candidate.
+ if (MI.hasOrderedMemoryRef())
+ return false;
+
+ if (!TII->isLdStSafeToPair(MI, TRI))
+ return false;
+
+ // Look ahead for a pairable instruction.
+ MachineBasicBlock::iterator E = MI.getParent()->end();
+ bool MergeForward;
+ MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, MergeForward);
+ if (Paired != E) {
+ MBBI = mergePairedInsns(MBBI, Paired, MergeForward);
+ return true;
+ }
+ return false;
+}
+
+// Merge two adjacent load/store instructions into a paired instruction
+// (LDP/SDP/SWP/LWP) if the effective address is 8-byte aligned in case of
+// SWP/LWP 16-byte aligned in case of LDP/SDP. This function selects the
+// appropriate paired opcode, verifies that the memory operand is properly
+// aligned, and checks that the offset is valid. If all conditions are met, it
+// builds and inserts the paired instruction.
+bool RISCVLoadStoreOpt::tryConvertToLdStPair(
+ MachineBasicBlock::iterator First, MachineBasicBlock::iterator Second) {
+ unsigned PairOpc;
+ Align RequiredAlignment;
+ switch (First->getOpcode()) {
+ default:
+ llvm_unreachable("Unsupported load/store instruction for pairing");
+ case RISCV::SW:
+ PairOpc = RISCV::MIPS_SWP;
+ RequiredAlignment = Align(8);
+ break;
+ case RISCV::LW:
+ PairOpc = RISCV::MIPS_LWP;
+ RequiredAlignment = Align(8);
+ break;
+ case RISCV::SD:
+ PairOpc = RISCV::MIPS_SDP;
+ RequiredAlignment = Align(16);
+ break;
+ case RISCV::LD:
+ PairOpc = RISCV::MIPS_LDP;
+ RequiredAlignment = Align(16);
+ break;
+ }
+
+ MachineFunction *MF = First->getMF();
+ const MachineMemOperand *MMO = *First->memoperands_begin();
+ Align MMOAlign = MMO->getAlign();
+
+ if (MMOAlign < RequiredAlignment)
+ return false;
+
+ int64_t Offset = First->getOperand(2).getImm();
+ if (!isUInt<7>(Offset))
+ return false;
+
+ MachineInstrBuilder MIB = BuildMI(
+ *MF,
+ First->getDebugLoc().get() ? First->getDebugLoc() : Second->getDebugLoc(),
+ TII->get(PairOpc));
+ MIB.add(First->getOperand(0))
+ .add(Second->getOperand(0))
+ .add(First->getOperand(1))
+ .add(First->getOperand(2))
+ .cloneMergedMemRefs({&*First, &*Second});
+
+ First->getParent()->insert(First, MIB);
+
+ First->removeFromParent();
+ Second->removeFromParent();
+
+ return true;
+}
+
+static bool mayAlias(MachineInstr &MIa,
+ SmallVectorImpl<MachineInstr *> &MemInsns,
+ AliasAnalysis *AA) {
+ for (MachineInstr *MIb : MemInsns)
+ if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false))
+ return true;
+
+ return false;
+}
+
+// Scan the instructions looking for a load/store that can be combined with the
+// current instruction into a wider equivalent or a load/store pair.
+// TODO: Extend pairing logic to consider reordering both instructions
+// to a safe "middle" position rather than only merging forward/backward.
+// This requires more sophisticated checks for aliasing, register
+// liveness, and potential scheduling hazards.
+MachineBasicBlock::iterator
+RISCVLoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
+ bool &MergeForward) {
+ MachineBasicBlock::iterator E = I->getParent()->end();
+ MachineBasicBlock::iterator MBBI = I;
+ MachineInstr &FirstMI = *I;
+ MBBI = next_nodbg(MBBI, E);
+
+ bool MayLoad = FirstMI.mayLoad();
+ Register Reg = FirstMI.getOperand(0).getReg();
+ Register BaseReg = FirstMI.getOperand(1).getReg();
+ int64_t Offset = FirstMI.getOperand(2).getImm();
+ int64_t OffsetStride = (*FirstMI.memoperands_begin())->getSize().getValue();
+
+ MergeForward = false;
+
+ // Track which register units have been modified and used between the first
+ // insn (inclusive) and the second insn.
+ ModifiedRegUnits.clear();
+ UsedRegUnits.clear();
+
+ // Remember any instructions that read/write memory between FirstMI and MI.
+ SmallVector<MachineInstr *, 4> MemInsns;
+
+ for (unsigned Count = 0; MBBI != E && Count < LdStLimit;
+ MBBI = next_nodbg(MBBI, E)) {
+ MachineInstr &MI = *MBBI;
+
+ // Don't count transient instructions towards the search limit since there
+ // may be
diff erent numbers of them if e.g. debug information is present.
+ if (!MI.isTransient())
+ ++Count;
+
+ if (MI.getOpcode() == FirstMI.getOpcode() &&
+ TII->isLdStSafeToPair(MI, TRI)) {
+ Register MIBaseReg = MI.getOperand(1).getReg();
+ int64_t MIOffset = MI.getOperand(2).getImm();
+
+ if (BaseReg == MIBaseReg) {
+ if ((Offset != MIOffset + OffsetStride) &&
+ (Offset + OffsetStride != MIOffset)) {
+ LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
+ TRI);
+ MemInsns.push_back(&MI);
+ continue;
+ }
+
+ // If the destination register of one load is the same register or a
+ // sub/super register of the other load, bail and keep looking.
+ if (MayLoad &&
+ TRI->isSuperOrSubRegisterEq(Reg, MI.getOperand(0).getReg())) {
+ LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
+ TRI);
+ MemInsns.push_back(&MI);
+ continue;
+ }
+
+ // If the BaseReg has been modified, then we cannot do the optimization.
+ if (!ModifiedRegUnits.available(BaseReg))
+ return E;
+
+ // If the Rt of the second instruction was not modified or used between
+ // the two instructions and none of the instructions between the second
+ // and first alias with the second, we can combine the second into the
+ // first.
+ if (ModifiedRegUnits.available(MI.getOperand(0).getReg()) &&
+ !(MI.mayLoad() &&
+ !UsedRegUnits.available(MI.getOperand(0).getReg())) &&
+ !mayAlias(MI, MemInsns, AA)) {
+
+ MergeForward = false;
+ return MBBI;
+ }
+
+ // Likewise, if the Rt of the first instruction is not modified or used
+ // between the two instructions and none of the instructions between the
+ // first and the second alias with the first, we can combine the first
+ // into the second.
+ if (!(MayLoad &&
+ !UsedRegUnits.available(FirstMI.getOperand(0).getReg())) &&
+ !mayAlias(FirstMI, MemInsns, AA)) {
+
+ if (ModifiedRegUnits.available(FirstMI.getOperand(0).getReg())) {
+ MergeForward = true;
+ return MBBI;
+ }
+ }
+ // Unable to combine these instructions due to interference in between.
+ // Keep looking.
+ }
+ }
+
+ // If the instruction wasn't a matching load or store. Stop searching if we
+ // encounter a call instruction that might modify memory.
+ if (MI.isCall())
+ return E;
+
+ // Update modified / uses register units.
+ LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
+
+ // Otherwise, if the base register is modified, we have no match, so
+ // return early.
+ if (!ModifiedRegUnits.available(BaseReg))
+ return E;
+
+ // Update list of instructions that read/write memory.
+ if (MI.mayLoadOrStore())
+ MemInsns.push_back(&MI);
+ }
+ return E;
+}
+
+MachineBasicBlock::iterator
+RISCVLoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator Paired,
+ bool MergeForward) {
+ MachineBasicBlock::iterator E = I->getParent()->end();
+ MachineBasicBlock::iterator NextI = next_nodbg(I, E);
+ // If NextI is the second of the two instructions to be merged, we need
+ // to skip one further. Either way we merge will invalidate the iterator,
+ // and we don't need to scan the new instruction, as it's a pairwise
+ // instruction, which we're not considering for further action anyway.
+ if (NextI == Paired)
+ NextI = next_nodbg(NextI, E);
+
+ // Insert our new paired instruction after whichever of the paired
+ // instructions MergeForward indicates.
+ MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
+ MachineBasicBlock::iterator DeletionPoint = MergeForward ? I : Paired;
+ int Offset = I->getOperand(2).getImm();
+ int PairedOffset = Paired->getOperand(2).getImm();
+ bool InsertAfter = (Offset < PairedOffset) ^ MergeForward;
+
+ if (!MergeForward)
+ Paired->getOperand(1).setIsKill(false);
+
+ // Kill flags may become invalid when moving stores for pairing.
+ if (I->getOperand(0).isUse()) {
+ if (!MergeForward) {
+ // Check if the Paired store's source register has a kill flag and clear
+ // it only if there are intermediate uses between I and Paired.
+ MachineOperand &PairedRegOp = Paired->getOperand(0);
+ if (PairedRegOp.isKill()) {
+ for (auto It = std::next(I); It != Paired; ++It) {
+ if (It->readsRegister(PairedRegOp.getReg(), TRI)) {
+ PairedRegOp.setIsKill(false);
+ break;
+ }
+ }
+ }
+ } else {
+ // Clear kill flags of the first store's register in the forward
+ // direction.
+ Register Reg = I->getOperand(0).getReg();
+ for (MachineInstr &MI : make_range(std::next(I), std::next(Paired)))
+ MI.clearRegisterKills(Reg, TRI);
+ }
+ }
+
+ MachineInstr *ToInsert = DeletionPoint->removeFromParent();
+ MachineBasicBlock &MBB = *InsertionPoint->getParent();
+ MachineBasicBlock::iterator First, Second;
+
+ if (!InsertAfter) {
+ First = MBB.insert(InsertionPoint, ToInsert);
+ Second = InsertionPoint;
+ } else {
+ Second = MBB.insertAfter(InsertionPoint, ToInsert);
+ First = InsertionPoint;
+ }
+
+ if (tryConvertToLdStPair(First, Second)) {
+ LLVM_DEBUG(dbgs() << "Pairing load/store:\n ");
+ LLVM_DEBUG(prev_nodbg(NextI, MBB.begin())->print(dbgs()));
+ }
+
+ return NextI;
+}
+
+// Returns an instance of the Load / Store Optimization pass.
+FunctionPass *llvm::createRISCVLoadStoreOptPass() {
+ return new RISCVLoadStoreOpt();
+}
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index 1b54c278820fc..3c996c82fcec4 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -62,14 +62,14 @@ static cl::opt<unsigned> RISCVMinimumJumpTableEntries(
"riscv-min-jump-table-entries", cl::Hidden,
cl::desc("Set minimum number of entries to use a jump table on RISCV"));
-static cl::opt<bool>
- UseMIPSLoadStorePairsOpt("mips-riscv-load-store-pairs",
- cl::desc("RISCV: Optimize for load-store bonding"),
- cl::init(false), cl::Hidden);
+static cl::opt<bool> UseMIPSLoadStorePairsOpt(
+ "use-riscv-mips-load-store-pairs",
+ cl::desc("Enable the load/store pair optimization pass"), cl::init(false),
+ cl::Hidden);
-static cl::opt<bool>
- UseCCMovInsn("riscv-ccmov", cl::desc("RISCV: Use 'mips.ccmov' instruction"),
- cl::init(true), cl::Hidden);
+static cl::opt<bool> UseCCMovInsn("use-riscv-ccmov",
+ cl::desc("Use 'mips.ccmov' instruction"),
+ cl::init(true), cl::Hidden);
void RISCVSubtarget::anchor() {}
@@ -248,6 +248,10 @@ void RISCVSubtarget::overridePostRASchedPolicy(MachineSchedPolicy &Policy,
}
}
+bool RISCVSubtarget::useLoadStorePairs() const {
+ return UseMIPSLoadStorePairsOpt && HasVendorXMIPSLSP;
+}
+
bool RISCVSubtarget::useCCMovInsn() const {
return UseCCMovInsn && HasVendorXMIPSCMove;
}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 52bb10f9ba19b..f78e5f8147d98 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -145,6 +145,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
initializeRISCVDAGToDAGISelLegacyPass(*PR);
initializeRISCVMoveMergePass(*PR);
initializeRISCVPushPopOptPass(*PR);
+ initializeRISCVLoadStoreOptPass(*PR);
}
static StringRef computeDataLayout(const Triple &TT,
@@ -551,6 +552,8 @@ void RISCVPassConfig::addPreSched2() {
// Emit KCFI checks for indirect calls.
addPass(createKCFIPass());
+ if (TM->getOptLevel() != CodeGenOptLevel::None)
+ addPass(createRISCVLoadStoreOptPass());
}
void RISCVPassConfig::addPreEmitPass() {
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index b67fbe1b5d3cd..976d1ee003a1f 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -182,6 +182,7 @@
; CHECK-NEXT: Post-RA pseudo instruction expansion pass
; CHECK-NEXT: RISC-V post-regalloc pseudo instruction expansion pass
; CHECK-NEXT: Insert KCFI indirect call checks
+; CHECK-NEXT: RISC-V Load / Store Optimizer
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Machine Natural Loop Construction
; CHECK-NEXT: PostRA Machine Instruction Scheduler
diff --git a/llvm/test/CodeGen/RISCV/load-store-pair.ll b/llvm/test/CodeGen/RISCV/load-store-pair.ll
new file mode 100644
index 0000000000000..4aad0a8a0d05f
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/load-store-pair.ll
@@ -0,0 +1,326 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+d -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32D
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+d -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64D
+; RUN: llc -mtriple=riscv32 -mattr=+Xmipslsp -use-riscv-mips-load-store-pairs=1 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32I_PAIR
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+d,+Xmipslsp -use-riscv-mips-load-store-pairs=1 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32D_PAIR
+; RUN: llc -mtriple=riscv64 -mattr=+Xmipslsp -use-riscv-mips-load-store-pairs=1 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64I_PAIR
+; RUN: llc -mtriple=riscv64 -mcpu=mips-p8700 -mattr=+Xmipslsp -use-riscv-mips-load-store-pairs=1 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64P_8700
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+d,+Xmipslsp -use-riscv-mips-load-store-pairs=1 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64D_PAIR
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+d -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64D_NOPAIR
+
+define void @testi(ptr %a) {
+; RV32I-LABEL: testi:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw s2, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset s2, -4
+; RV32I-NEXT: .cfi_offset s3, -8
+; RV32I-NEXT: .cfi_offset s4, -12
+; RV32I-NEXT: .cfi_offset s5, -16
+; RV32I-NEXT: lw s3, 0(a0)
+; RV32I-NEXT: lw s2, 4(a0)
+; RV32I-NEXT: lw s5, 8(a0)
+; RV32I-NEXT: lw s4, 12(a0)
+; RV32I-NEXT: #APP
+; RV32I-NEXT: #NO_APP
+; RV32I-NEXT: lw s2, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: .cfi_restore s4
+; RV32I-NEXT: .cfi_restore s5
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV32D-LABEL: testi:
+; RV32D: # %bb.0: # %entry
+; RV32D-NEXT: addi sp, sp, -16
+; RV32D-NEXT: .cfi_def_cfa_offset 16
+; RV32D-NEXT: sw s2, 12(sp) # 4-byte Folded Spill
+; RV32D-NEXT: sw s3, 8(sp) # 4-byte Folded Spill
+; RV32D-NEXT: sw s4, 4(sp) # 4-byte Folded Spill
+; RV32D-NEXT: sw s5, 0(sp) # 4-byte Folded Spill
+; RV32D-NEXT: .cfi_offset s2, -4
+; RV32D-NEXT: .cfi_offset s3, -8
+; RV32D-NEXT: .cfi_offset s4, -12
+; RV32D-NEXT: .cfi_offset s5, -16
+; RV32D-NEXT: lw s3, 0(a0)
+; RV32D-NEXT: lw s2, 4(a0)
+; RV32D-NEXT: lw s5, 8(a0)
+; RV32D-NEXT: lw s4, 12(a0)
+; RV32D-NEXT: #APP
+; RV32D-NEXT: #NO_APP
+; RV32D-NEXT: lw s2, 12(sp) # 4-byte Folded Reload
+; RV32D-NEXT: lw s3, 8(sp) # 4-byte Folded Reload
+; RV32D-NEXT: lw s4, 4(sp) # 4-byte Folded Reload
+; RV32D-NEXT: lw s5, 0(sp) # 4-byte Folded Reload
+; RV32D-NEXT: .cfi_restore s2
+; RV32D-NEXT: .cfi_restore s3
+; RV32D-NEXT: .cfi_restore s4
+; RV32D-NEXT: .cfi_restore s5
+; RV32D-NEXT: addi sp, sp, 16
+; RV32D-NEXT: .cfi_def_cfa_offset 0
+; RV32D-NEXT: ret
+;
+; RV64I-LABEL: testi:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd s2, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s4, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s5, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset s2, -8
+; RV64I-NEXT: .cfi_offset s3, -16
+; RV64I-NEXT: .cfi_offset s4, -24
+; RV64I-NEXT: .cfi_offset s5, -32
+; RV64I-NEXT: ld s3, 0(a0)
+; RV64I-NEXT: ld s2, 8(a0)
+; RV64I-NEXT: ld s5, 16(a0)
+; RV64I-NEXT: ld s4, 24(a0)
+; RV64I-NEXT: #APP
+; RV64I-NEXT: #NO_APP
+; RV64I-NEXT: ld s2, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s4, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s5, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: .cfi_restore s4
+; RV64I-NEXT: .cfi_restore s5
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV64D-LABEL: testi:
+; RV64D: # %bb.0: # %entry
+; RV64D-NEXT: addi sp, sp, -32
+; RV64D-NEXT: .cfi_def_cfa_offset 32
+; RV64D-NEXT: sd s2, 24(sp) # 8-byte Folded Spill
+; RV64D-NEXT: sd s3, 16(sp) # 8-byte Folded Spill
+; RV64D-NEXT: sd s4, 8(sp) # 8-byte Folded Spill
+; RV64D-NEXT: sd s5, 0(sp) # 8-byte Folded Spill
+; RV64D-NEXT: .cfi_offset s2, -8
+; RV64D-NEXT: .cfi_offset s3, -16
+; RV64D-NEXT: .cfi_offset s4, -24
+; RV64D-NEXT: .cfi_offset s5, -32
+; RV64D-NEXT: ld s3, 0(a0)
+; RV64D-NEXT: ld s2, 8(a0)
+; RV64D-NEXT: ld s5, 16(a0)
+; RV64D-NEXT: ld s4, 24(a0)
+; RV64D-NEXT: #APP
+; RV64D-NEXT: #NO_APP
+; RV64D-NEXT: ld s2, 24(sp) # 8-byte Folded Reload
+; RV64D-NEXT: ld s3, 16(sp) # 8-byte Folded Reload
+; RV64D-NEXT: ld s4, 8(sp) # 8-byte Folded Reload
+; RV64D-NEXT: ld s5, 0(sp) # 8-byte Folded Reload
+; RV64D-NEXT: .cfi_restore s2
+; RV64D-NEXT: .cfi_restore s3
+; RV64D-NEXT: .cfi_restore s4
+; RV64D-NEXT: .cfi_restore s5
+; RV64D-NEXT: addi sp, sp, 32
+; RV64D-NEXT: .cfi_def_cfa_offset 0
+; RV64D-NEXT: ret
+;
+; RV32I_PAIR-LABEL: testi:
+; RV32I_PAIR: # %bb.0: # %entry
+; RV32I_PAIR-NEXT: addi sp, sp, -16
+; RV32I_PAIR-NEXT: .cfi_def_cfa_offset 16
+; RV32I_PAIR-NEXT: sw s3, 8(sp) # 4-byte Folded Spill
+; RV32I_PAIR-NEXT: sw s2, 12(sp) # 4-byte Folded Spill
+; RV32I_PAIR-NEXT: sw s5, 0(sp) # 4-byte Folded Spill
+; RV32I_PAIR-NEXT: sw s4, 4(sp) # 4-byte Folded Spill
+; RV32I_PAIR-NEXT: .cfi_offset s2, -4
+; RV32I_PAIR-NEXT: .cfi_offset s3, -8
+; RV32I_PAIR-NEXT: .cfi_offset s4, -12
+; RV32I_PAIR-NEXT: .cfi_offset s5, -16
+; RV32I_PAIR-NEXT: mips.lwp s3, s2, 0(a0)
+; RV32I_PAIR-NEXT: mips.lwp s5, s4, 8(a0)
+; RV32I_PAIR-NEXT: #APP
+; RV32I_PAIR-NEXT: #NO_APP
+; RV32I_PAIR-NEXT: lw s3, 8(sp) # 4-byte Folded Reload
+; RV32I_PAIR-NEXT: lw s2, 12(sp) # 4-byte Folded Reload
+; RV32I_PAIR-NEXT: lw s5, 0(sp) # 4-byte Folded Reload
+; RV32I_PAIR-NEXT: lw s4, 4(sp) # 4-byte Folded Reload
+; RV32I_PAIR-NEXT: .cfi_restore s2
+; RV32I_PAIR-NEXT: .cfi_restore s3
+; RV32I_PAIR-NEXT: .cfi_restore s4
+; RV32I_PAIR-NEXT: .cfi_restore s5
+; RV32I_PAIR-NEXT: addi sp, sp, 16
+; RV32I_PAIR-NEXT: .cfi_def_cfa_offset 0
+; RV32I_PAIR-NEXT: ret
+;
+; RV32D_PAIR-LABEL: testi:
+; RV32D_PAIR: # %bb.0: # %entry
+; RV32D_PAIR-NEXT: addi sp, sp, -16
+; RV32D_PAIR-NEXT: .cfi_def_cfa_offset 16
+; RV32D_PAIR-NEXT: sw s3, 8(sp) # 4-byte Folded Spill
+; RV32D_PAIR-NEXT: sw s2, 12(sp) # 4-byte Folded Spill
+; RV32D_PAIR-NEXT: sw s5, 0(sp) # 4-byte Folded Spill
+; RV32D_PAIR-NEXT: sw s4, 4(sp) # 4-byte Folded Spill
+; RV32D_PAIR-NEXT: .cfi_offset s2, -4
+; RV32D_PAIR-NEXT: .cfi_offset s3, -8
+; RV32D_PAIR-NEXT: .cfi_offset s4, -12
+; RV32D_PAIR-NEXT: .cfi_offset s5, -16
+; RV32D_PAIR-NEXT: mips.lwp s3, s2, 0(a0)
+; RV32D_PAIR-NEXT: mips.lwp s5, s4, 8(a0)
+; RV32D_PAIR-NEXT: #APP
+; RV32D_PAIR-NEXT: #NO_APP
+; RV32D_PAIR-NEXT: lw s3, 8(sp) # 4-byte Folded Reload
+; RV32D_PAIR-NEXT: lw s2, 12(sp) # 4-byte Folded Reload
+; RV32D_PAIR-NEXT: lw s5, 0(sp) # 4-byte Folded Reload
+; RV32D_PAIR-NEXT: lw s4, 4(sp) # 4-byte Folded Reload
+; RV32D_PAIR-NEXT: .cfi_restore s2
+; RV32D_PAIR-NEXT: .cfi_restore s3
+; RV32D_PAIR-NEXT: .cfi_restore s4
+; RV32D_PAIR-NEXT: .cfi_restore s5
+; RV32D_PAIR-NEXT: addi sp, sp, 16
+; RV32D_PAIR-NEXT: .cfi_def_cfa_offset 0
+; RV32D_PAIR-NEXT: ret
+;
+; RV64I_PAIR-LABEL: testi:
+; RV64I_PAIR: # %bb.0: # %entry
+; RV64I_PAIR-NEXT: addi sp, sp, -32
+; RV64I_PAIR-NEXT: .cfi_def_cfa_offset 32
+; RV64I_PAIR-NEXT: sd s3, 16(sp) # 8-byte Folded Spill
+; RV64I_PAIR-NEXT: sd s2, 24(sp) # 8-byte Folded Spill
+; RV64I_PAIR-NEXT: sd s5, 0(sp) # 8-byte Folded Spill
+; RV64I_PAIR-NEXT: sd s4, 8(sp) # 8-byte Folded Spill
+; RV64I_PAIR-NEXT: .cfi_offset s2, -8
+; RV64I_PAIR-NEXT: .cfi_offset s3, -16
+; RV64I_PAIR-NEXT: .cfi_offset s4, -24
+; RV64I_PAIR-NEXT: .cfi_offset s5, -32
+; RV64I_PAIR-NEXT: mips.ldp s3, s2, 0(a0)
+; RV64I_PAIR-NEXT: ld s5, 16(a0)
+; RV64I_PAIR-NEXT: ld s4, 24(a0)
+; RV64I_PAIR-NEXT: #APP
+; RV64I_PAIR-NEXT: #NO_APP
+; RV64I_PAIR-NEXT: ld s3, 16(sp) # 8-byte Folded Reload
+; RV64I_PAIR-NEXT: ld s2, 24(sp) # 8-byte Folded Reload
+; RV64I_PAIR-NEXT: ld s5, 0(sp) # 8-byte Folded Reload
+; RV64I_PAIR-NEXT: ld s4, 8(sp) # 8-byte Folded Reload
+; RV64I_PAIR-NEXT: .cfi_restore s2
+; RV64I_PAIR-NEXT: .cfi_restore s3
+; RV64I_PAIR-NEXT: .cfi_restore s4
+; RV64I_PAIR-NEXT: .cfi_restore s5
+; RV64I_PAIR-NEXT: addi sp, sp, 32
+; RV64I_PAIR-NEXT: .cfi_def_cfa_offset 0
+; RV64I_PAIR-NEXT: ret
+;
+; RV64P_8700-LABEL: testi:
+; RV64P_8700: # %bb.0: # %entry
+; RV64P_8700-NEXT: addi sp, sp, -32
+; RV64P_8700-NEXT: .cfi_def_cfa_offset 32
+; RV64P_8700-NEXT: sd s3, 16(sp) # 8-byte Folded Spill
+; RV64P_8700-NEXT: sd s2, 24(sp) # 8-byte Folded Spill
+; RV64P_8700-NEXT: sd s5, 0(sp) # 8-byte Folded Spill
+; RV64P_8700-NEXT: sd s4, 8(sp) # 8-byte Folded Spill
+; RV64P_8700-NEXT: .cfi_offset s2, -8
+; RV64P_8700-NEXT: .cfi_offset s3, -16
+; RV64P_8700-NEXT: .cfi_offset s4, -24
+; RV64P_8700-NEXT: .cfi_offset s5, -32
+; RV64P_8700-NEXT: mips.ldp s3, s2, 0(a0)
+; RV64P_8700-NEXT: ld s5, 16(a0)
+; RV64P_8700-NEXT: ld s4, 24(a0)
+; RV64P_8700-NEXT: #APP
+; RV64P_8700-NEXT: #NO_APP
+; RV64P_8700-NEXT: ld s3, 16(sp) # 8-byte Folded Reload
+; RV64P_8700-NEXT: ld s2, 24(sp) # 8-byte Folded Reload
+; RV64P_8700-NEXT: ld s5, 0(sp) # 8-byte Folded Reload
+; RV64P_8700-NEXT: ld s4, 8(sp) # 8-byte Folded Reload
+; RV64P_8700-NEXT: .cfi_restore s2
+; RV64P_8700-NEXT: .cfi_restore s3
+; RV64P_8700-NEXT: .cfi_restore s4
+; RV64P_8700-NEXT: .cfi_restore s5
+; RV64P_8700-NEXT: addi sp, sp, 32
+; RV64P_8700-NEXT: .cfi_def_cfa_offset 0
+; RV64P_8700-NEXT: ret
+;
+; RV64D_PAIR-LABEL: testi:
+; RV64D_PAIR: # %bb.0: # %entry
+; RV64D_PAIR-NEXT: addi sp, sp, -32
+; RV64D_PAIR-NEXT: .cfi_def_cfa_offset 32
+; RV64D_PAIR-NEXT: sd s3, 16(sp) # 8-byte Folded Spill
+; RV64D_PAIR-NEXT: sd s2, 24(sp) # 8-byte Folded Spill
+; RV64D_PAIR-NEXT: sd s5, 0(sp) # 8-byte Folded Spill
+; RV64D_PAIR-NEXT: sd s4, 8(sp) # 8-byte Folded Spill
+; RV64D_PAIR-NEXT: .cfi_offset s2, -8
+; RV64D_PAIR-NEXT: .cfi_offset s3, -16
+; RV64D_PAIR-NEXT: .cfi_offset s4, -24
+; RV64D_PAIR-NEXT: .cfi_offset s5, -32
+; RV64D_PAIR-NEXT: mips.ldp s3, s2, 0(a0)
+; RV64D_PAIR-NEXT: ld s5, 16(a0)
+; RV64D_PAIR-NEXT: ld s4, 24(a0)
+; RV64D_PAIR-NEXT: #APP
+; RV64D_PAIR-NEXT: #NO_APP
+; RV64D_PAIR-NEXT: ld s3, 16(sp) # 8-byte Folded Reload
+; RV64D_PAIR-NEXT: ld s2, 24(sp) # 8-byte Folded Reload
+; RV64D_PAIR-NEXT: ld s5, 0(sp) # 8-byte Folded Reload
+; RV64D_PAIR-NEXT: ld s4, 8(sp) # 8-byte Folded Reload
+; RV64D_PAIR-NEXT: .cfi_restore s2
+; RV64D_PAIR-NEXT: .cfi_restore s3
+; RV64D_PAIR-NEXT: .cfi_restore s4
+; RV64D_PAIR-NEXT: .cfi_restore s5
+; RV64D_PAIR-NEXT: addi sp, sp, 32
+; RV64D_PAIR-NEXT: .cfi_def_cfa_offset 0
+; RV64D_PAIR-NEXT: ret
+;
+; RV64D_NOPAIR-LABEL: testi:
+; RV64D_NOPAIR: # %bb.0: # %entry
+; RV64D_NOPAIR-NEXT: addi sp, sp, -32
+; RV64D_NOPAIR-NEXT: .cfi_def_cfa_offset 32
+; RV64D_NOPAIR-NEXT: sd s2, 24(sp) # 8-byte Folded Spill
+; RV64D_NOPAIR-NEXT: sd s3, 16(sp) # 8-byte Folded Spill
+; RV64D_NOPAIR-NEXT: sd s4, 8(sp) # 8-byte Folded Spill
+; RV64D_NOPAIR-NEXT: sd s5, 0(sp) # 8-byte Folded Spill
+; RV64D_NOPAIR-NEXT: .cfi_offset s2, -8
+; RV64D_NOPAIR-NEXT: .cfi_offset s3, -16
+; RV64D_NOPAIR-NEXT: .cfi_offset s4, -24
+; RV64D_NOPAIR-NEXT: .cfi_offset s5, -32
+; RV64D_NOPAIR-NEXT: ld s3, 0(a0)
+; RV64D_NOPAIR-NEXT: ld s2, 8(a0)
+; RV64D_NOPAIR-NEXT: ld s5, 16(a0)
+; RV64D_NOPAIR-NEXT: ld s4, 24(a0)
+; RV64D_NOPAIR-NEXT: #APP
+; RV64D_NOPAIR-NEXT: #NO_APP
+; RV64D_NOPAIR-NEXT: ld s2, 24(sp) # 8-byte Folded Reload
+; RV64D_NOPAIR-NEXT: ld s3, 16(sp) # 8-byte Folded Reload
+; RV64D_NOPAIR-NEXT: ld s4, 8(sp) # 8-byte Folded Reload
+; RV64D_NOPAIR-NEXT: ld s5, 0(sp) # 8-byte Folded Reload
+; RV64D_NOPAIR-NEXT: .cfi_restore s2
+; RV64D_NOPAIR-NEXT: .cfi_restore s3
+; RV64D_NOPAIR-NEXT: .cfi_restore s4
+; RV64D_NOPAIR-NEXT: .cfi_restore s5
+; RV64D_NOPAIR-NEXT: addi sp, sp, 32
+; RV64D_NOPAIR-NEXT: .cfi_def_cfa_offset 0
+; RV64D_NOPAIR-NEXT: ret
+entry:
+ %arrayidx = getelementptr inbounds ptr, ptr %a, i64 1
+ %0 = load ptr, ptr %arrayidx, align 16
+ %1 = load ptr, ptr %a, align 16
+ %arrayidx2 = getelementptr inbounds ptr, ptr %a, i64 3
+ %2 = load ptr, ptr %arrayidx2, align 16
+ %arrayidx3 = getelementptr inbounds ptr, ptr %a, i64 2
+ %3 = load ptr, ptr %arrayidx3, align 8
+ tail call void asm sideeffect "", "{x18},{x19},{x20},{x21}"(ptr %0, ptr %1, ptr %2, ptr %3)
+ ret void
+}
More information about the llvm-commits
mailing list