[llvm] 4f9d5a8 - [RISCV] Generate Xqcilsm LWMI/SWMI load/store multiple instructions (#171079)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 10 21:34:07 PST 2025
Author: Sudharsan Veeravalli
Date: 2025-12-11T11:04:04+05:30
New Revision: 4f9d5a8bc85431b722e6f90744f3683adffc17b4
URL: https://github.com/llvm/llvm-project/commit/4f9d5a8bc85431b722e6f90744f3683adffc17b4
DIFF: https://github.com/llvm/llvm-project/commit/4f9d5a8bc85431b722e6f90744f3683adffc17b4.diff
LOG: [RISCV] Generate Xqcilsm LWMI/SWMI load/store multiple instructions (#171079)
This patch adds support for generating the Xqcilsm load/store multiple
instructions as a part of the RISCVLoadStoreOptimizer pass. For now we
only combine two load/store instructions into a load/store multiple.
Support for converting more loads/stores will be added in follow-up
patches. These instructions are only applicable for 32-bit loads/stores
with an alignment of 4-bytes.
Added:
llvm/test/CodeGen/RISCV/xqcilsm-lwmi-swmi.mir
Modified:
llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
index a22ab6bfc04b8..f1827dcf174f3 100644
--- a/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
@@ -70,6 +70,12 @@ struct RISCVLoadStoreOpt : public MachineFunctionPass {
// Convert load/store pairs to single instructions.
bool tryConvertToLdStPair(MachineBasicBlock::iterator First,
MachineBasicBlock::iterator Second);
+ bool tryConvertToXqcilsmLdStPair(MachineFunction *MF,
+ MachineBasicBlock::iterator First,
+ MachineBasicBlock::iterator Second);
+ bool tryConvertToMIPSLdStPair(MachineFunction *MF,
+ MachineBasicBlock::iterator First,
+ MachineBasicBlock::iterator Second);
// Scan the instructions looking for a load/store that can be combined
// with the current instruction into a load/store pair.
@@ -114,7 +120,7 @@ bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
ModifiedRegUnits.init(*TRI);
UsedRegUnits.init(*TRI);
- if (Subtarget.useMIPSLoadStorePairs()) {
+ if (Subtarget.useMIPSLoadStorePairs() || Subtarget.hasVendorXqcilsm()) {
for (MachineBasicBlock &MBB : Fn) {
LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
@@ -168,14 +174,93 @@ bool RISCVLoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
return false;
}
-// Merge two adjacent load/store instructions into a paired instruction
-// (LDP/SDP/SWP/LWP) if the effective address is 8-byte aligned in case of
-// SWP/LWP 16-byte aligned in case of LDP/SDP. This function selects the
-// appropriate paired opcode, verifies that the memory operand is properly
-// aligned, and checks that the offset is valid. If all conditions are met, it
-// builds and inserts the paired instruction.
-bool RISCVLoadStoreOpt::tryConvertToLdStPair(
- MachineBasicBlock::iterator First, MachineBasicBlock::iterator Second) {
+bool RISCVLoadStoreOpt::tryConvertToXqcilsmLdStPair(
+ MachineFunction *MF, MachineBasicBlock::iterator First,
+ MachineBasicBlock::iterator Second) {
+ unsigned Opc = First->getOpcode();
+ if ((Opc != RISCV::LW && Opc != RISCV::SW) || Second->getOpcode() != Opc)
+ return false;
+
+ const auto &FirstOp1 = First->getOperand(1);
+ const auto &SecondOp1 = Second->getOperand(1);
+ const auto &FirstOp2 = First->getOperand(2);
+ const auto &SecondOp2 = Second->getOperand(2);
+
+ // Require simple reg+imm addressing for both.
+ if (!FirstOp1.isReg() || !SecondOp1.isReg() || !FirstOp2.isImm() ||
+ !SecondOp2.isImm())
+ return false;
+
+ Register Base1 = FirstOp1.getReg();
+ Register Base2 = SecondOp1.getReg();
+
+ if (Base1 != Base2)
+ return false;
+
+ const MachineMemOperand *MMO = *First->memoperands_begin();
+ Align MMOAlign = MMO->getAlign();
+
+ if (MMOAlign < Align(4))
+ return false;
+
+ auto &FirstOp0 = First->getOperand(0);
+ auto &SecondOp0 = Second->getOperand(0);
+
+ int64_t Off1 = FirstOp2.getImm();
+ int64_t Off2 = SecondOp2.getImm();
+
+ if (Off2 < Off1) {
+ std::swap(FirstOp0, SecondOp0);
+ std::swap(Off1, Off2);
+ }
+
+ Register StartReg = FirstOp0.getReg();
+ Register NextReg = SecondOp0.getReg();
+
+ if (StartReg == RISCV::X0 || NextReg == RISCV::X0)
+ return false;
+
+ // If the base reg gets overwritten by one of the loads then bail out.
+ if (Opc == RISCV::LW && (StartReg == Base1 || NextReg == Base1))
+ return false;
+
+ if (!isShiftedUInt<5, 2>(Off1) || (Off2 - Off1 != 4))
+ return false;
+
+ if (NextReg != StartReg + 1)
+ return false;
+
+ unsigned XqciOpc = (Opc == RISCV::LW) ? RISCV::QC_LWMI : RISCV::QC_SWMI;
+
+ auto StartRegState = (Opc == RISCV::LW) ? RegState::Define
+ : getKillRegState(FirstOp0.isKill());
+ auto NextRegState =
+ (Opc == RISCV::LW)
+ ? RegState::ImplicitDefine
+ : (RegState::Implicit | getKillRegState(SecondOp0.isKill()));
+
+ DebugLoc DL =
+ First->getDebugLoc() ? First->getDebugLoc() : Second->getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(*MF, DL, TII->get(XqciOpc));
+ MIB.addReg(StartReg, StartRegState)
+ .addReg(Base1, getKillRegState(FirstOp1.isKill() || SecondOp1.isKill()))
+ .addImm(2)
+ .addImm(Off1)
+ .cloneMergedMemRefs({&*First, &*Second})
+ .addReg(NextReg, NextRegState);
+
+ First->getParent()->insert(First, MIB);
+ First->removeFromParent();
+ Second->removeFromParent();
+
+ return true;
+}
+
+bool RISCVLoadStoreOpt::tryConvertToMIPSLdStPair(
+ MachineFunction *MF, MachineBasicBlock::iterator First,
+ MachineBasicBlock::iterator Second) {
+ // Try converting to SWP/LWP/LDP/SDP.
+ // SWP/LWP requires 8-byte alignment whereas LDP/SDP needs 16-byte alignment.
unsigned PairOpc;
Align RequiredAlignment;
switch (First->getOpcode()) {
@@ -199,7 +284,6 @@ bool RISCVLoadStoreOpt::tryConvertToLdStPair(
break;
}
- MachineFunction *MF = First->getMF();
const MachineMemOperand *MMO = *First->memoperands_begin();
Align MMOAlign = MMO->getAlign();
@@ -227,6 +311,24 @@ bool RISCVLoadStoreOpt::tryConvertToLdStPair(
return true;
}
+// Merge two adjacent load/store instructions into a paired instruction.
+// This function calls the vendor specific implementation that seelects the
+// appropriate paired opcode, verifies that the memory operand is properly
+// aligned, and checks that the offset is valid. If all conditions are met, it
+// builds and inserts the paired instruction.
+bool RISCVLoadStoreOpt::tryConvertToLdStPair(
+ MachineBasicBlock::iterator First, MachineBasicBlock::iterator Second) {
+ MachineFunction *MF = First->getMF();
+ const RISCVSubtarget &STI = MF->getSubtarget<RISCVSubtarget>();
+
+ // Try converting to QC_LWMI/QC_SWMI if the XQCILSM extension is enabled.
+ if (!STI.is64Bit() && STI.hasVendorXqcilsm())
+ return tryConvertToXqcilsmLdStPair(MF, First, Second);
+
+ // Else try to convert them into MIPS Paired Loads/Stores.
+ return tryConvertToMIPSLdStPair(MF, First, Second);
+}
+
static bool mayAlias(MachineInstr &MIa,
SmallVectorImpl<MachineInstr *> &MemInsns,
AliasAnalysis *AA) {
diff --git a/llvm/test/CodeGen/RISCV/xqcilsm-lwmi-swmi.mir b/llvm/test/CodeGen/RISCV/xqcilsm-lwmi-swmi.mir
new file mode 100644
index 0000000000000..396f67326a7ca
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/xqcilsm-lwmi-swmi.mir
@@ -0,0 +1,315 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcilsm -run-pass=riscv-load-store-opt %s -o - | FileCheck %s
+
+--- |
+
+ define void @pair_two_lw_into_qc_lwmi() nounwind { ret void }
+ define void @pair_two_lw_into_qc_lwmi_reversed() nounwind { ret void }
+ define void @pair_two_sw_into_qc_swmi_reversed() nounwind { ret void }
+ define void @no_pair_if_
diff erent_base_regs() nounwind { ret void }
+ define void @no_pair_if_alignment_lt_4() nounwind { ret void }
+ define void @pair_two_sw_into_qc_swmi() nounwind { ret void }
+ define void @no_pair_if_misaligned() nounwind { ret void }
+ define void @pair_at_upper_boundary_lw() nounwind { ret void }
+ define void @pair_at_upper_boundary_sw() nounwind { ret void }
+ define void @no_pair_if_offset_out_of_range_lw() nounwind { ret void }
+ define void @no_pair_if_offset_out_of_range_sw() nounwind { ret void }
+ define void @no_pair_if_non_consecutive_regs() nounwind { ret void }
+ define void @no_pair_if_rd_is_x0() nounwind { ret void }
+ define void @no_pair_if_lw_rd_equals_base() nounwind { ret void }
+ define void @pair_if_not_adjacent() nounwind { ret void }
+ define void @pair_if_not_adjacent_use() nounwind { ret void }
+ define void @no_pair_if_not_adjacent_use() nounwind { ret void }
+---
+name: pair_two_lw_into_qc_lwmi
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10
+ ; CHECK-LABEL: name: pair_two_lw_into_qc_lwmi
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x12 = QC_LWMI $x10, 2, 0, implicit-def $x13 :: (load (s32))
+ ; CHECK-NEXT: PseudoRET
+ $x12 = LW $x10, 0 :: (load (s32), align 4)
+ $x13 = LW $x10, 4 :: (load (s32), align 4)
+ PseudoRET
+
+...
+---
+# FIXME: Kill flags are not propagated correctly for the base register
+name: pair_two_lw_into_qc_lwmi_reversed
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10
+ ; CHECK-LABEL: name: pair_two_lw_into_qc_lwmi_reversed
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x12 = QC_LWMI $x10, 2, 0, implicit-def $x13 :: (load (s32))
+ ; CHECK-NEXT: PseudoRET
+ $x13 = LW $x10, 4 :: (load (s32))
+ $x12 = LW killed $x10, 0 :: (load (s32))
+ PseudoRET
+
+...
+---
+name: pair_two_sw_into_qc_swmi_reversed
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10, $x12, $x13
+ ; CHECK-LABEL: name: pair_two_sw_into_qc_swmi_reversed
+ ; CHECK: liveins: $x10, $x12, $x13
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: QC_SWMI killed $x12, $x10, 2, 0, implicit killed $x13 :: (store (s32))
+ ; CHECK-NEXT: PseudoRET
+ SW killed $x13, $x10, 4 :: (store (s32))
+ SW killed $x12, $x10, 0 :: (store (s32))
+ PseudoRET
+
+...
+---
+name: no_pair_if_
diff erent_base_regs
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10, $x11
+ ; CHECK-LABEL: name: no_pair_if_
diff erent_base_regs
+ ; CHECK: liveins: $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x12 = LW $x10, 0 :: (load (s32))
+ ; CHECK-NEXT: $x13 = LW $x11, 4 :: (load (s32))
+ ; CHECK-NEXT: PseudoRET
+ $x12 = LW $x10, 0 :: (load (s32))
+ $x13 = LW $x11, 4 :: (load (s32))
+ PseudoRET
+
+...
+---
+name: no_pair_if_alignment_lt_4
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10
+ ; CHECK-LABEL: name: no_pair_if_alignment_lt_4
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x12 = LW $x10, 0 :: (load (s32))
+ ; CHECK-NEXT: $x13 = LW $x10, 3 :: (load (s32))
+ ; CHECK-NEXT: PseudoRET
+ $x12 = LW $x10, 0 :: (load (s32))
+ $x13 = LW $x10, 3 :: (load (s32))
+ PseudoRET
+
+...
+---
+name: pair_two_sw_into_qc_swmi
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10, $x12, $x13
+ ; CHECK-LABEL: name: pair_two_sw_into_qc_swmi
+ ; CHECK: liveins: $x10, $x12, $x13
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: QC_SWMI killed $x12, $x10, 2, 0, implicit killed $x13 :: (store (s32))
+ ; CHECK-NEXT: PseudoRET
+ SW killed $x12, $x10, 0 :: (store (s32), align 4)
+ SW killed $x13, $x10, 4 :: (store (s32), align 4)
+ PseudoRET
+
+...
+---
+name: no_pair_if_misaligned
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10
+ ; CHECK-LABEL: name: no_pair_if_misaligned
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x12 = LW $x10, 2 :: (load (s32))
+ ; CHECK-NEXT: $x13 = LW $x10, 6 :: (load (s32))
+ ; CHECK-NEXT: PseudoRET
+ $x12 = LW $x10, 2 :: (load (s32), align 4)
+ $x13 = LW $x10, 6 :: (load (s32), align 4)
+ PseudoRET
+
+...
+---
+# FIXME: Kill flags are not propagated correctly for the base register
+name: pair_at_upper_boundary_lw
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10
+ ; CHECK-LABEL: name: pair_at_upper_boundary_lw
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x12 = QC_LWMI $x10, 2, 124, implicit-def $x13 :: (load (s32))
+ ; CHECK-NEXT: PseudoRET
+ $x12 = LW $x10, 124 :: (load (s32), align 4)
+ $x13 = LW killed $x10, 128 :: (load (s32), align 4)
+ PseudoRET
+
+...
+---
+# FIXME: Kill flags are not propagated correctly for the base register
+name: pair_at_upper_boundary_sw
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10, $x12, $x13
+ ; CHECK-LABEL: name: pair_at_upper_boundary_sw
+ ; CHECK: liveins: $x10, $x12, $x13
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: QC_SWMI $x12, $x10, 2, 124, implicit $x13 :: (store (s32))
+ ; CHECK-NEXT: PseudoRET
+ SW $x12, $x10, 124 :: (store (s32), align 4)
+ SW $x13, killed $x10, 128 :: (store (s32), align 4)
+ PseudoRET
+
+...
+---
+name: no_pair_if_offset_out_of_range_lw
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10
+ ; CHECK-LABEL: name: no_pair_if_offset_out_of_range_lw
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x12 = LW $x10, 128 :: (load (s32))
+ ; CHECK-NEXT: $x13 = LW $x10, 132 :: (load (s32))
+ ; CHECK-NEXT: PseudoRET
+ $x12 = LW $x10, 128 :: (load (s32), align 4)
+ $x13 = LW $x10, 132 :: (load (s32), align 4)
+ PseudoRET
+
+...
+---
+name: no_pair_if_offset_out_of_range_sw
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10, $x12, $x13
+ ; CHECK-LABEL: name: no_pair_if_offset_out_of_range_sw
+ ; CHECK: liveins: $x10, $x12, $x13
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: SW $x12, $x10, 128 :: (store (s32))
+ ; CHECK-NEXT: SW $x13, $x10, 132 :: (store (s32))
+ ; CHECK-NEXT: PseudoRET
+ SW $x12, $x10, 128 :: (store (s32), align 4)
+ SW $x13, $x10, 132 :: (store (s32), align 4)
+ PseudoRET
+
+...
+---
+name: no_pair_if_non_consecutive_regs
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10
+ ; CHECK-LABEL: name: no_pair_if_non_consecutive_regs
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x11 = LW $x10, 0 :: (load (s32))
+ ; CHECK-NEXT: $x13 = LW $x10, 4 :: (load (s32))
+ ; CHECK-NEXT: PseudoRET
+ $x11 = LW $x10, 0 :: (load (s32), align 4)
+ $x13 = LW $x10, 4 :: (load (s32), align 4)
+ PseudoRET
+
+...
+---
+name: no_pair_if_rd_is_x0
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10
+ ; CHECK-LABEL: name: no_pair_if_rd_is_x0
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x0 = LW $x10, 0 :: (load (s32))
+ ; CHECK-NEXT: $x1 = LW $x10, 4 :: (load (s32))
+ ; CHECK-NEXT: PseudoRET
+ $x0 = LW $x10, 0 :: (load (s32), align 4)
+ $x1 = LW $x10, 4 :: (load (s32), align 4)
+ PseudoRET
+
+...
+---
+name: no_pair_if_lw_rd_equals_base
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10
+ ; CHECK-LABEL: name: no_pair_if_lw_rd_equals_base
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x10 = LW $x10, 20 :: (load (s32))
+ ; CHECK-NEXT: $x11 = LW $x10, 24 :: (load (s32))
+ ; CHECK-NEXT: PseudoRET
+ $x10 = LW $x10, 20 :: (load (s32), align 4)
+ $x11 = LW $x10, 24 :: (load (s32), align 4)
+ PseudoRET
+
+...
+---
+# FIXME: Kill flags are not propagated correctly for the base register
+name: pair_if_not_adjacent
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10
+ ; CHECK-LABEL: name: pair_if_not_adjacent
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x1 = QC_LWMI $x10, 2, 20, implicit-def $x2 :: (load (s32))
+ ; CHECK-NEXT: $x3 = ADDI $x1, 10
+ ; CHECK-NEXT: PseudoRET
+ $x1 = LW $x10, 20 :: (load (s32), align 4)
+ $x3 = ADDI $x1, 10
+ $x2 = LW killed $x10, 24 :: (load (s32), align 4)
+ PseudoRET
+
+...
+---
+name: pair_if_not_adjacent_use
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10, $x1, $x2
+ ; CHECK-LABEL: name: pair_if_not_adjacent_use
+ ; CHECK: liveins: $x10, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x2 = ADDI $x2, 10
+ ; CHECK-NEXT: QC_SWMI $x1, $x10, 2, 20, implicit $x2 :: (store (s32))
+ ; CHECK-NEXT: PseudoRET
+ SW $x1, $x10, 20 :: (store (s32), align 4)
+ $x2 = ADDI $x2, 10
+ SW $x2, $x10, 24 :: (store (s32), align 4)
+ PseudoRET
+
+...
+---
+name: no_pair_if_not_adjacent_use
+tracksRegLiveness: false
+body: |
+ bb.0:
+ liveins: $x10, $x2
+ ; CHECK-LABEL: name: no_pair_if_not_adjacent_use
+ ; CHECK: liveins: $x10, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x1 = LW $x10, 20 :: (load (s32))
+ ; CHECK-NEXT: $x1 = ADDI $x1, 10
+ ; CHECK-NEXT: SW $x2, $x10, 40 :: (store (s32))
+ ; CHECK-NEXT: $x2 = LW $x10, 24 :: (load (s32))
+ ; CHECK-NEXT: PseudoRET
+ $x1 = LW $x10, 20 :: (load (s32), align 4)
+ $x1 = ADDI $x1, 10
+ SW $x2, $x10, 40 :: (store (s32), align 4)
+ $x2 = LW $x10, 24 :: (load (s32), align 4)
+ PseudoRET
+
+...
More information about the llvm-commits
mailing list