[llvm] [RISCV][VLOPT] Support segmented store instructions (PR #155467)
Min-Yih Hsu via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 4 12:00:26 PDT 2025
https://github.com/mshockwave updated https://github.com/llvm/llvm-project/pull/155467
>From 5df264046fedd0e543b9499f286d14b098ef87d3 Mon Sep 17 00:00:00 2001
From: Min Hsu <min at myhsu.dev>
Date: Tue, 26 Aug 2025 10:40:13 -0700
Subject: [PATCH 1/9] [RISCV][VLOPT] Support segmented store instructions
---
llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 93 +++++++-
llvm/test/CodeGen/RISCV/rvv/pr141907.ll | 2 +-
.../test/CodeGen/RISCV/rvv/vl-opt-op-info.mir | 216 ++++++++++++++++++
3 files changed, 306 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index b7c93ffea41f7..c57b08172c821 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -178,6 +178,19 @@ static unsigned getIntegerExtensionOperandEEW(unsigned Factor,
return Log2EEW;
}
+#define VSEG_CASES(Prefix, EEW) \
+ RISCV::Prefix##SEG2E##EEW##_V: \
+ case RISCV::Prefix##SEG3E##EEW##_V: \
+ case RISCV::Prefix##SEG4E##EEW##_V: \
+ case RISCV::Prefix##SEG5E##EEW##_V: \
+ case RISCV::Prefix##SEG6E##EEW##_V: \
+ case RISCV::Prefix##SEG7E##EEW##_V: \
+ case RISCV::Prefix##SEG8E##EEW##_V
+#define VSSEG_CASES(EEW) VSEG_CASES(VS, EEW)
+#define VSSSEG_CASES(EEW) VSEG_CASES(VSS, EEW)
+#define VSUXSEG_CASES(EEW) VSEG_CASES(VSUX, I##EEW)
+#define VSOXSEG_CASES(EEW) VSEG_CASES(VSOX, I##EEW)
+
static std::optional<unsigned>
getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
const MachineInstr &MI = *MO.getParent();
@@ -225,21 +238,29 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
case RISCV::VSE8_V:
case RISCV::VLSE8_V:
case RISCV::VSSE8_V:
+ case VSSEG_CASES(8):
+ case VSSSEG_CASES(8):
return 3;
case RISCV::VLE16_V:
case RISCV::VSE16_V:
case RISCV::VLSE16_V:
case RISCV::VSSE16_V:
+ case VSSEG_CASES(16):
+ case VSSSEG_CASES(16):
return 4;
case RISCV::VLE32_V:
case RISCV::VSE32_V:
case RISCV::VLSE32_V:
case RISCV::VSSE32_V:
+ case VSSEG_CASES(32):
+ case VSSSEG_CASES(32):
return 5;
case RISCV::VLE64_V:
case RISCV::VSE64_V:
case RISCV::VLSE64_V:
case RISCV::VSSE64_V:
+ case VSSEG_CASES(64):
+ case VSSSEG_CASES(64):
return 6;
// Vector Indexed Instructions
@@ -248,7 +269,9 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
case RISCV::VLUXEI8_V:
case RISCV::VLOXEI8_V:
case RISCV::VSUXEI8_V:
- case RISCV::VSOXEI8_V: {
+ case RISCV::VSOXEI8_V:
+ case VSUXSEG_CASES(8):
+ case VSOXSEG_CASES(8): {
if (MO.getOperandNo() == 0)
return MILog2SEW;
return 3;
@@ -256,7 +279,9 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
case RISCV::VLUXEI16_V:
case RISCV::VLOXEI16_V:
case RISCV::VSUXEI16_V:
- case RISCV::VSOXEI16_V: {
+ case RISCV::VSOXEI16_V:
+ case VSUXSEG_CASES(16):
+ case VSOXSEG_CASES(16): {
if (MO.getOperandNo() == 0)
return MILog2SEW;
return 4;
@@ -264,7 +289,9 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
case RISCV::VLUXEI32_V:
case RISCV::VLOXEI32_V:
case RISCV::VSUXEI32_V:
- case RISCV::VSOXEI32_V: {
+ case RISCV::VSOXEI32_V:
+ case VSUXSEG_CASES(32):
+ case VSOXSEG_CASES(32): {
if (MO.getOperandNo() == 0)
return MILog2SEW;
return 5;
@@ -272,7 +299,9 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
case RISCV::VLUXEI64_V:
case RISCV::VLOXEI64_V:
case RISCV::VSUXEI64_V:
- case RISCV::VSOXEI64_V: {
+ case RISCV::VSOXEI64_V:
+ case VSUXSEG_CASES(64):
+ case VSOXSEG_CASES(64): {
if (MO.getOperandNo() == 0)
return MILog2SEW;
return 6;
@@ -1375,6 +1404,55 @@ RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const {
return VLOp;
}
+/// Return true if MI is an instruction used for assembling registers
+/// for segmented store instructions, namely, RISCVISD::TUPLE_INSERT.
+/// Currently it's lowered to INSERT_SUBREG.
+static bool isTupleInsertInstr(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ if (MI.getOpcode() != RISCV::INSERT_SUBREG)
+ return false;
+
+ const TargetRegisterClass *DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
+ // Check whether it was lowered with the correct subreg index.
+ [[maybe_unused]] const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ [[maybe_unused]] unsigned SubRegIdx = MI.getOperand(3).getImm();
+ switch (DstRC->getID()) {
+ case RISCV::VRN2M1RegClassID:
+ case RISCV::VRN2M1NoV0RegClassID:
+ case RISCV::VRN3M1RegClassID:
+ case RISCV::VRN3M1NoV0RegClassID:
+ case RISCV::VRN4M1RegClassID:
+ case RISCV::VRN4M1NoV0RegClassID:
+ case RISCV::VRN5M1RegClassID:
+ case RISCV::VRN5M1NoV0RegClassID:
+ case RISCV::VRN6M1RegClassID:
+ case RISCV::VRN6M1NoV0RegClassID:
+ case RISCV::VRN7M1RegClassID:
+ case RISCV::VRN7M1NoV0RegClassID:
+ case RISCV::VRN8M1RegClassID:
+ case RISCV::VRN8M1NoV0RegClassID:
+ assert(TRI->getSubRegIdxSize(SubRegIdx) == RISCV::RVVBitsPerBlock &&
+ "unexpected subreg index for VRM1 sub-register");
+ return true;
+ case RISCV::VRN2M2RegClassID:
+ case RISCV::VRN2M2NoV0RegClassID:
+ case RISCV::VRN3M2RegClassID:
+ case RISCV::VRN3M2NoV0RegClassID:
+ case RISCV::VRN4M2RegClassID:
+ case RISCV::VRN4M2NoV0RegClassID:
+ assert(TRI->getSubRegIdxSize(SubRegIdx) == RISCV::RVVBitsPerBlock * 2 &&
+ "unexpected subreg index for VRM2 sub-register");
+ return true;
+ case RISCV::VRN2M4RegClassID:
+ case RISCV::VRN2M4NoV0RegClassID:
+ assert(TRI->getSubRegIdxSize(SubRegIdx) == RISCV::RVVBitsPerBlock * 4 &&
+ "unexpected subreg index for VRM4 sub-register");
+ return true;
+ default:
+ return false;
+ }
+}
+
std::optional<MachineOperand>
RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const {
std::optional<MachineOperand> CommonVL;
@@ -1395,6 +1473,13 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const {
continue;
}
+ if (isTupleInsertInstr(UserMI, *MRI)) {
+ LLVM_DEBUG(dbgs().indent(4) << "Peeking through uses of INSERT_SUBREG\n");
+ Worklist.insert_range(llvm::make_pointer_range(
+ MRI->use_operands(UserMI.getOperand(0).getReg())));
+ continue;
+ }
+
if (UserMI.isPHI()) {
// Don't follow PHI cycles
if (!PHISeen.insert(&UserMI).second)
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr141907.ll b/llvm/test/CodeGen/RISCV/rvv/pr141907.ll
index f93f88a5bc06c..1f485ea348396 100644
--- a/llvm/test/CodeGen/RISCV/rvv/pr141907.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/pr141907.ll
@@ -12,7 +12,7 @@ define void @pr141907(ptr %0) nounwind {
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmclr.m v0
; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: vsetvli a5, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: addi a3, sp, 20
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
index 4e428fd1a6cea..e6d2f133ed7fd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
@@ -2192,3 +2192,219 @@ body: |
%x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0
%y:vr = PseudoVRGATHEREI16_VV_M1_E32_MF2 $noreg, $noreg, %x, 1, 5 /* e32 */, 0
$v8 = COPY %y
+...
+---
+name: vsseg3e32_v
+body: |
+ bb.0:
+ liveins: $v8
+
+ ; CHECK-LABEL: name: vsseg3e32_v
+ ; CHECK: liveins: $v8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 5 /* e32 */, 3 /* ta, ma */
+ ; CHECK-NEXT: [[PseudoVADD_VV_M1_1:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 5 /* e32 */, 3 /* ta, ma */
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrn3m1 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.sub_vrm1_0
+ ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoVADD_VV_M1_]], %subreg.sub_vrm1_1
+ ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoVADD_VV_M1_1]], %subreg.sub_vrm1_2
+ ; CHECK-NEXT: PseudoVSSEG3E32_V_M1 killed [[INSERT_SUBREG2]], $noreg, 1, 5 /* e32 */
+ %0:vr = COPY $v8
+ %1:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 10, 5 /* e32 */, 3 /* ta, ma */
+ %2:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 3 /* ta, ma */
+ %6:vrn3m1 = IMPLICIT_DEF
+ %5:vrn3m1 = INSERT_SUBREG %6, %0, %subreg.sub_vrm1_0
+ %7:vrn3m1 = INSERT_SUBREG %5, %1, %subreg.sub_vrm1_1
+ %8:vrn3m1 = INSERT_SUBREG %7, %2, %subreg.sub_vrm1_2
+ PseudoVSSEG3E32_V_M1 killed %8, $noreg, 1, 5 /* e32 */
+...
+---
+name: vsseg3e64_v_incompatible_eew
+body: |
+ bb.0:
+ liveins: $v8
+
+ ; CHECK-LABEL: name: vsseg3e64_v_incompatible_eew
+ ; CHECK: liveins: $v8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 10, 5 /* e32 */, 3 /* ta, ma */
+ ; CHECK-NEXT: [[PseudoVADD_VV_M1_1:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 3 /* ta, ma */
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrn3m1 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.sub_vrm1_0
+ ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoVADD_VV_M1_]], %subreg.sub_vrm1_1
+ ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoVADD_VV_M1_1]], %subreg.sub_vrm1_2
+ ; CHECK-NEXT: PseudoVSSEG3E64_V_M1 killed [[INSERT_SUBREG2]], $noreg, 1, 6 /* e64 */
+ %0:vr = COPY $v8
+ %1:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 10, 5 /* e32 */, 3 /* ta, ma */
+ %2:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 3 /* ta, ma */
+ %6:vrn3m1 = IMPLICIT_DEF
+ %5:vrn3m1 = INSERT_SUBREG %6, %0, %subreg.sub_vrm1_0
+ %7:vrn3m1 = INSERT_SUBREG %5, %1, %subreg.sub_vrm1_1
+ %8:vrn3m1 = INSERT_SUBREG %7, %2, %subreg.sub_vrm1_2
+ PseudoVSSEG3E64_V_M1 killed %8, $noreg, 1, 6 /* e64 */
+...
+---
+name: vsseg3e32_v_incompatible_emul
+body: |
+ bb.0:
+ liveins: $v8
+
+ ; CHECK-LABEL: name: vsseg3e32_v_incompatible_emul
+ ; CHECK: liveins: $v8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 10, 5 /* e32 */, 3 /* ta, ma */
+ ; CHECK-NEXT: [[PseudoVADD_VV_M1_1:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 3 /* ta, ma */
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrn3m1 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.sub_vrm1_0
+ ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoVADD_VV_M1_]], %subreg.sub_vrm1_1
+ ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoVADD_VV_M1_1]], %subreg.sub_vrm1_2
+ ; CHECK-NEXT: PseudoVSSEG3E32_V_M1 killed [[INSERT_SUBREG2]], $noreg, 1, 6 /* e64 */
+ %0:vr = COPY $v8
+ %1:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 10, 5 /* e32 */, 3 /* ta, ma */
+ %2:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 3 /* ta, ma */
+ %6:vrn3m1 = IMPLICIT_DEF
+ %5:vrn3m1 = INSERT_SUBREG %6, %0, %subreg.sub_vrm1_0
+ %7:vrn3m1 = INSERT_SUBREG %5, %1, %subreg.sub_vrm1_1
+ %8:vrn3m1 = INSERT_SUBREG %7, %2, %subreg.sub_vrm1_2
+ PseudoVSSEG3E32_V_M1 killed %8, $noreg, 1, 6 /* e64 */
+...
+---
+name: vssseg3e32_v
+body: |
+ bb.0:
+ liveins: $v8
+
+ ; CHECK-LABEL: name: vssseg3e32_v
+ ; CHECK: liveins: $v8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 5 /* e32 */, 3 /* ta, ma */
+ ; CHECK-NEXT: [[PseudoVADD_VV_M1_1:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 5 /* e32 */, 3 /* ta, ma */
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrn3m1 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.sub_vrm1_0
+ ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoVADD_VV_M1_]], %subreg.sub_vrm1_1
+ ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoVADD_VV_M1_1]], %subreg.sub_vrm1_2
+ ; CHECK-NEXT: PseudoVSSSEG3E32_V_M1 killed [[INSERT_SUBREG2]], $noreg, $noreg, 1, 5 /* e32 */
+ %0:vr = COPY $v8
+ %1:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 10, 5 /* e32 */, 3 /* ta, ma */
+ %2:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 3 /* ta, ma */
+ %6:vrn3m1 = IMPLICIT_DEF
+ %5:vrn3m1 = INSERT_SUBREG %6, %0, %subreg.sub_vrm1_0
+ %7:vrn3m1 = INSERT_SUBREG %5, %1, %subreg.sub_vrm1_1
+ %8:vrn3m1 = INSERT_SUBREG %7, %2, %subreg.sub_vrm1_2
+ PseudoVSSSEG3E32_V_M1 killed %8, $noreg, $noreg, 1, 5 /* e32 */
+...
+---
+name: vsuxseg3ei64_v
+body: |
+ bb.0:
+ liveins: $v8
+
+ ; CHECK-LABEL: name: vsuxseg3ei64_v
+ ; CHECK: liveins: $v8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 5 /* e32 */, 3 /* ta, ma */
+ ; CHECK-NEXT: [[PseudoVADD_VV_M1_1:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 5 /* e32 */, 3 /* ta, ma */
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrn3m1 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.sub_vrm1_0
+ ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoVADD_VV_M1_]], %subreg.sub_vrm1_1
+ ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoVADD_VV_M1_1]], %subreg.sub_vrm1_2
+ ; CHECK-NEXT: PseudoVSUXSEG3EI64_V_M2_M1 killed [[INSERT_SUBREG2]], $noreg, $noreg, 1, 5 /* e32 */
+ %0:vr = COPY $v8
+ %1:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 10, 5 /* e32 */, 3 /* ta, ma */
+ %2:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 3 /* ta, ma */
+ %6:vrn3m1 = IMPLICIT_DEF
+ %5:vrn3m1 = INSERT_SUBREG %6, %0, %subreg.sub_vrm1_0
+ %7:vrn3m1 = INSERT_SUBREG %5, %1, %subreg.sub_vrm1_1
+ %8:vrn3m1 = INSERT_SUBREG %7, %2, %subreg.sub_vrm1_2
+ PseudoVSUXSEG3EI64_V_M2_M1 killed %8, $noreg, $noreg, 1, 5 /* e32 */
+...
+---
+name: vsuxseg3ei64_v_incompatible_data_eew
+body: |
+ bb.0:
+ liveins: $v8
+
+ ; CHECK-LABEL: name: vsuxseg3ei64_v_incompatible_data_eew
+ ; CHECK: liveins: $v8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 10, 6 /* e64 */, 3 /* ta, ma */
+ ; CHECK-NEXT: [[PseudoVADD_VV_M1_1:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 6 /* e64 */, 3 /* ta, ma */
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrn3m1 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.sub_vrm1_0
+ ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoVADD_VV_M1_]], %subreg.sub_vrm1_1
+ ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoVADD_VV_M1_1]], %subreg.sub_vrm1_2
+ ; CHECK-NEXT: PseudoVSUXSEG3EI64_V_M2_M1 killed [[INSERT_SUBREG2]], $noreg, $noreg, 1, 5 /* e32 */
+ %0:vr = COPY $v8
+ %1:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 10, 6 /* e64 */, 3 /* ta, ma */
+ %2:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 6 /* e64 */, 3 /* ta, ma */
+ %6:vrn3m1 = IMPLICIT_DEF
+ %5:vrn3m1 = INSERT_SUBREG %6, %0, %subreg.sub_vrm1_0
+ %7:vrn3m1 = INSERT_SUBREG %5, %1, %subreg.sub_vrm1_1
+ %8:vrn3m1 = INSERT_SUBREG %7, %2, %subreg.sub_vrm1_2
+ PseudoVSUXSEG3EI64_V_M2_M1 killed %8, $noreg, $noreg, 1, 5 /* e32 */
+...
+---
+name: vsuxseg3ei32_v_index
+body: |
+ bb.0:
+
+ ; CHECK-LABEL: name: vsuxseg3ei32_v_index
+ ; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 5 /* e32 */, 3 /* ta, ma */
+ ; CHECK-NEXT: PseudoVSUXSEG3EI32_V_M1_M2 $noreg, $noreg, [[PseudoVADD_VV_M1_]], 1, 6 /* e64 */
+ %2:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 3 /* ta, ma */
+ PseudoVSUXSEG3EI32_V_M1_M2 $noreg, $noreg, %2, 1, 6 /* e64 */
+...
+---
+name: vsuxseg3ei32_v_incompatible_index_eew
+body: |
+ bb.0:
+
+ ; CHECK-LABEL: name: vsuxseg3ei32_v_incompatible_index_eew
+ ; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 6 /* e64 */, 3 /* ta, ma */
+ ; CHECK-NEXT: PseudoVSUXSEG3EI32_V_M1_M2 $noreg, $noreg, [[PseudoVADD_VV_M1_]], 1, 6 /* e64 */
+ %2:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 6 /* e64 */, 3 /* ta, ma */
+ PseudoVSUXSEG3EI32_V_M1_M2 $noreg, $noreg, %2, 1, 6 /* e64 */
+...
+---
+name: vsoxseg3ei64_v
+body: |
+ bb.0:
+ liveins: $v8
+
+ ; CHECK-LABEL: name: vsoxseg3ei64_v
+ ; CHECK: liveins: $v8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 5 /* e32 */, 3 /* ta, ma */
+ ; CHECK-NEXT: [[PseudoVADD_VV_M1_1:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 5 /* e32 */, 3 /* ta, ma */
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrn3m1 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.sub_vrm1_0
+ ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoVADD_VV_M1_]], %subreg.sub_vrm1_1
+ ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoVADD_VV_M1_1]], %subreg.sub_vrm1_2
+ ; CHECK-NEXT: PseudoVSOXSEG3EI64_V_M2_M1 killed [[INSERT_SUBREG2]], $noreg, $noreg, 1, 5 /* e32 */
+ %0:vr = COPY $v8
+ %1:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 10, 5 /* e32 */, 3 /* ta, ma */
+ %2:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 3 /* ta, ma */
+ %6:vrn3m1 = IMPLICIT_DEF
+ %5:vrn3m1 = INSERT_SUBREG %6, %0, %subreg.sub_vrm1_0
+ %7:vrn3m1 = INSERT_SUBREG %5, %1, %subreg.sub_vrm1_1
+ %8:vrn3m1 = INSERT_SUBREG %7, %2, %subreg.sub_vrm1_2
+ PseudoVSOXSEG3EI64_V_M2_M1 killed %8, $noreg, $noreg, 1, 5 /* e32 */
+...
+---
+name: vsoxseg3ei32_v_index
+body: |
+ bb.0:
+
+ ; CHECK-LABEL: name: vsoxseg3ei32_v_index
+ ; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 5 /* e32 */, 3 /* ta, ma */
+ ; CHECK-NEXT: PseudoVSOXSEG3EI32_V_M1_M2 $noreg, $noreg, [[PseudoVADD_VV_M1_]], 1, 6 /* e64 */
+ %2:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 3 /* ta, ma */
+ PseudoVSOXSEG3EI32_V_M1_M2 $noreg, $noreg, %2, 1, 6 /* e64 */
+...
>From 9b74e9eeb0de0f242ec5cfde717b093ca2e47d80 Mon Sep 17 00:00:00 2001
From: Min Hsu <min at myhsu.dev>
Date: Tue, 26 Aug 2025 12:00:12 -0700
Subject: [PATCH 2/9] fixup! Update more test cases
---
.../test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
index 63fd892058811..d394df954cbda 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
@@ -383,14 +383,13 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract(<vscale x 2 x i1> %
; RV32-NEXT: vmsne.vi v0, v10, 0
; RV32-NEXT: vle32.v v10, (a0), v0.t
; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma
+; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV32-NEXT: vnsrl.wx v13, v10, a2
; RV32-NEXT: vnsrl.wi v12, v10, 0
-; RV32-NEXT: vmv.x.s a2, v10
+; RV32-NEXT: vmv.x.s a1, v10
; RV32-NEXT: vmv1r.v v0, v8
-; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV32-NEXT: vsseg2e32.v v12, (a0), v0.t
-; RV32-NEXT: mv a0, a2
+; RV32-NEXT: mv a0, a1
; RV32-NEXT: ret
;
; RV64-LABEL: masked_load_store_factor2_v2_shared_mask_extract:
>From de5ed1a3a7b5fd8d69b9b8ba4043e601ccfa54b5 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Tue, 2 Sep 2025 11:55:49 -0700
Subject: [PATCH 3/9] fixup! Filter out some of the INSERT_SUBREG users
---
llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 37 ++++++++++++++++++++--
llvm/test/CodeGen/RISCV/rvv/vl-opt.mir | 32 +++++++++++++++++++
2 files changed, 67 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index c57b08172c821..9cf769dd08c52 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -1453,6 +1453,34 @@ static bool isTupleInsertInstr(const MachineInstr &MI,
}
}
+static bool isSegmentedStoreInstr(const MachineInstr &MI) {
+ const RISCVVPseudosTable::PseudoInfo *RVV =
+ RISCVVPseudosTable::getPseudoInfo(MI.getOpcode());
+ if (!RVV)
+ return false;
+ switch (RVV->BaseInstr) {
+ case VSSEG_CASES(8):
+ case VSSSEG_CASES(8):
+ case VSUXSEG_CASES(8):
+ case VSOXSEG_CASES(8):
+ case VSSEG_CASES(16):
+ case VSSSEG_CASES(16):
+ case VSUXSEG_CASES(16):
+ case VSOXSEG_CASES(16):
+ case VSSEG_CASES(32):
+ case VSSSEG_CASES(32):
+ case VSUXSEG_CASES(32):
+ case VSOXSEG_CASES(32):
+ case VSSEG_CASES(64):
+ case VSSSEG_CASES(64):
+ case VSUXSEG_CASES(64):
+ case VSOXSEG_CASES(64):
+ return true;
+ default:
+ return false;
+ }
+}
+
std::optional<MachineOperand>
RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const {
std::optional<MachineOperand> CommonVL;
@@ -1475,8 +1503,13 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const {
if (isTupleInsertInstr(UserMI, *MRI)) {
LLVM_DEBUG(dbgs().indent(4) << "Peeking through uses of INSERT_SUBREG\n");
- Worklist.insert_range(llvm::make_pointer_range(
- MRI->use_operands(UserMI.getOperand(0).getReg())));
+ for (MachineOperand &UseOp :
+ MRI->use_operands(UserMI.getOperand(0).getReg())) {
+ const MachineInstr &CandidateMI = *UseOp.getParent();
+ if (CandidateMI.getOpcode() == RISCV::INSERT_SUBREG ||
+ isSegmentedStoreInstr(CandidateMI))
+ Worklist.insert(&UseOp);
+ }
continue;
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
index 0acdca91ee84c..b5c17a21e00ff 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
@@ -661,3 +661,35 @@ body: |
%y:vrnov0, %vl:gprnox0 = PseudoVLE8FF_V_M1_MASK $noreg, $noreg, $noreg, 1, 3 /* e8 */, 3 /* ta, ma */
PseudoVSE8_V_M1 %x, $noreg, %vl, 3 /* e8 */
...
+---
+name: insert_subreg_bitcast_no_peekthru
+body: |
+ bb.0:
+ liveins: $v8, $v9, $v10
+
+ ; We should not peekthrough an INSERT_SUBREG if its user is not a segmented store or another INSERT_SUBREG.
+ ; CHECK-LABEL: name: insert_subreg_bitcast_no_peekthru
+ ; CHECK: liveins: $v8, $v9, $v10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v9
+ ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 3 /* ta, ma */
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v10
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrn4m1 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrn4m1 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.sub_vrm1_0
+ ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrn4m1 = INSERT_SUBREG [[INSERT_SUBREG]], [[COPY1]], %subreg.sub_vrm1_1
+ ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrn4m1 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoVADD_VV_M1_]], %subreg.sub_vrm1_2
+ ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrn4m1 = INSERT_SUBREG [[INSERT_SUBREG2]], [[COPY2]], %subreg.sub_vrm1_3
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vrm4 = COPY [[INSERT_SUBREG3]]
+ ; CHECK-NEXT: PseudoVSE32_V_M4 [[COPY3]], $noreg, 1, 5 /* e32 */
+ %0:vr = COPY $v8
+ %1:vr = COPY $v9
+ %2:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 3 /* ta, ma */
+ %3:vr = COPY $v10
+ %6:vrn4m1 = IMPLICIT_DEF
+ %5:vrn4m1 = INSERT_SUBREG %6, %0, %subreg.sub_vrm1_0
+ %7:vrn4m1 = INSERT_SUBREG %5, %1, %subreg.sub_vrm1_1
+ %8:vrn4m1 = INSERT_SUBREG %7, %2, %subreg.sub_vrm1_2
+ %9:vrn4m1 = INSERT_SUBREG %8, %3, %subreg.sub_vrm1_3
+ %10:vrm4 = COPY %9
+ PseudoVSE32_V_M4 %10:vrm4, $noreg, 1, 5 /* e32 */
>From 8df283bfda788b813d052cf371c11698080697f7 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Tue, 2 Sep 2025 12:09:25 -0700
Subject: [PATCH 4/9] fixup! fixup! Filter out some of the INSERT_SUBREG users
---
llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 9cf769dd08c52..29ffbe64fbd9d 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -1506,6 +1506,10 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const {
for (MachineOperand &UseOp :
MRI->use_operands(UserMI.getOperand(0).getReg())) {
const MachineInstr &CandidateMI = *UseOp.getParent();
+ // We should not propagate the VL if the user is not a segmented store
+ // or another INSERT_SUBREG, since VL just works differently
+ // between segmented operations (per-field) v.s. other RVV ops (on the
+ // whole register group).
if (CandidateMI.getOpcode() == RISCV::INSERT_SUBREG ||
isSegmentedStoreInstr(CandidateMI))
Worklist.insert(&UseOp);
>From 3bfff33ab2728ded78eb93cc71f318bd3184235e Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Tue, 2 Sep 2025 18:28:47 -0700
Subject: [PATCH 5/9] fixup! Use isTupleInsertInstr instead
---
llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 29ffbe64fbd9d..e0e9514a369ed 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -1510,7 +1510,7 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const {
// or another INSERT_SUBREG, since VL just works differently
// between segmented operations (per-field) v.s. other RVV ops (on the
// whole register group).
- if (CandidateMI.getOpcode() == RISCV::INSERT_SUBREG ||
+ if (isTupleInsertInstr(CandidateMI, *MRI) ||
isSegmentedStoreInstr(CandidateMI))
Worklist.insert(&UseOp);
}
>From fdf82423fa6d1adba1535538dd0510146914c490 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Wed, 3 Sep 2025 09:32:27 -0700
Subject: [PATCH 6/9] fixup! Stop propagation if any of the INSERT_SUBREG is
not illegible
---
llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 7 ++++---
llvm/test/CodeGen/RISCV/rvv/vl-opt.mir | 8 +++++++-
2 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index e0e9514a369ed..37e997ec62875 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -1510,9 +1510,10 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const {
// or another INSERT_SUBREG, since VL just works differently
// between segmented operations (per-field) v.s. other RVV ops (on the
// whole register group).
- if (isTupleInsertInstr(CandidateMI, *MRI) ||
- isSegmentedStoreInstr(CandidateMI))
- Worklist.insert(&UseOp);
+ if (!isTupleInsertInstr(CandidateMI, *MRI) &&
+ !isSegmentedStoreInstr(CandidateMI))
+ return std::nullopt;
+ Worklist.insert(&UseOp);
}
continue;
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
index b5c17a21e00ff..086b3203ed5b0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
@@ -667,7 +667,7 @@ body: |
bb.0:
liveins: $v8, $v9, $v10
- ; We should not peekthrough an INSERT_SUBREG if its user is not a segmented store or another INSERT_SUBREG.
+ ; We should not peekthrough an INSERT_SUBREG if any of its users is not a segmented store or another INSERT_SUBREG.
; CHECK-LABEL: name: insert_subreg_bitcast_no_peekthru
; CHECK: liveins: $v8, $v9, $v10
; CHECK-NEXT: {{ $}}
@@ -682,6 +682,9 @@ body: |
; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrn4m1 = INSERT_SUBREG [[INSERT_SUBREG2]], [[COPY2]], %subreg.sub_vrm1_3
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vrm4 = COPY [[INSERT_SUBREG3]]
; CHECK-NEXT: PseudoVSE32_V_M4 [[COPY3]], $noreg, 1, 5 /* e32 */
+ ; CHECK-NEXT: [[PseudoVADD_VV_M1_1:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, [[PseudoVADD_VV_M1_]], $noreg, 10, 5 /* e32 */, 3 /* ta, ma */
+ ; CHECK-NEXT: $v10 = COPY [[PseudoVADD_VV_M1_1]]
+ ; CHECK-NEXT: PseudoRET implicit $v10
%0:vr = COPY $v8
%1:vr = COPY $v9
%2:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 3 /* ta, ma */
@@ -693,3 +696,6 @@ body: |
%9:vrn4m1 = INSERT_SUBREG %8, %3, %subreg.sub_vrm1_3
%10:vrm4 = COPY %9
PseudoVSE32_V_M4 %10:vrm4, $noreg, 1, 5 /* e32 */
+ %11:vr = PseudoVADD_VV_M1 $noreg, %2, $noreg, 10, 5 /* e32 */, 3 /* ta, ma */
+ $v10 = COPY %11
+ PseudoRET implicit $v10
>From 39bc8d632aadb892f7520b1298a8e0bd8ca1d781 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Wed, 3 Sep 2025 10:08:26 -0700
Subject: [PATCH 7/9] fixup! Check tuple register class using TSFlags
---
llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 49 ++++++----------------
1 file changed, 13 insertions(+), 36 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 37e997ec62875..101a0167e76b7 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -1413,44 +1413,21 @@ static bool isTupleInsertInstr(const MachineInstr &MI,
return false;
const TargetRegisterClass *DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
- // Check whether it was lowered with the correct subreg index.
+ if (!RISCVRI::isVRegClass(DstRC->TSFlags))
+ return false;
+ unsigned NF = RISCVRI::getNF(DstRC->TSFlags);
+ if (NF < 2)
+ return false;
+
+ // Check whether INSERT_SUBREG was lowered with the correct subreg index.
+ auto VLMul = RISCVRI::getLMul(DstRC->TSFlags);
+ [[maybe_unused]] auto [LMul, IsFractional] = RISCVVType::decodeVLMUL(VLMul);
+ assert(!IsFractional && "unexpected LMUL for tuple register classes");
[[maybe_unused]] const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
[[maybe_unused]] unsigned SubRegIdx = MI.getOperand(3).getImm();
- switch (DstRC->getID()) {
- case RISCV::VRN2M1RegClassID:
- case RISCV::VRN2M1NoV0RegClassID:
- case RISCV::VRN3M1RegClassID:
- case RISCV::VRN3M1NoV0RegClassID:
- case RISCV::VRN4M1RegClassID:
- case RISCV::VRN4M1NoV0RegClassID:
- case RISCV::VRN5M1RegClassID:
- case RISCV::VRN5M1NoV0RegClassID:
- case RISCV::VRN6M1RegClassID:
- case RISCV::VRN6M1NoV0RegClassID:
- case RISCV::VRN7M1RegClassID:
- case RISCV::VRN7M1NoV0RegClassID:
- case RISCV::VRN8M1RegClassID:
- case RISCV::VRN8M1NoV0RegClassID:
- assert(TRI->getSubRegIdxSize(SubRegIdx) == RISCV::RVVBitsPerBlock &&
- "unexpected subreg index for VRM1 sub-register");
- return true;
- case RISCV::VRN2M2RegClassID:
- case RISCV::VRN2M2NoV0RegClassID:
- case RISCV::VRN3M2RegClassID:
- case RISCV::VRN3M2NoV0RegClassID:
- case RISCV::VRN4M2RegClassID:
- case RISCV::VRN4M2NoV0RegClassID:
- assert(TRI->getSubRegIdxSize(SubRegIdx) == RISCV::RVVBitsPerBlock * 2 &&
- "unexpected subreg index for VRM2 sub-register");
- return true;
- case RISCV::VRN2M4RegClassID:
- case RISCV::VRN2M4NoV0RegClassID:
- assert(TRI->getSubRegIdxSize(SubRegIdx) == RISCV::RVVBitsPerBlock * 4 &&
- "unexpected subreg index for VRM4 sub-register");
- return true;
- default:
- return false;
- }
+ assert(TRI->getSubRegIdxSize(SubRegIdx) == RISCV::RVVBitsPerBlock * LMul &&
+ "unexpected subreg index of tuple register class");
+ return true;
}
static bool isSegmentedStoreInstr(const MachineInstr &MI) {
>From 06b75b882b832f5ba16126cdab1bec937a5b10ee Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Thu, 4 Sep 2025 10:41:49 -0700
Subject: [PATCH 8/9] fixup! Address review comments
---
llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 16 +++++-----------
llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir | 15 +++++++++++++++
llvm/test/CodeGen/RISCV/rvv/vl-opt.ll | 16 ++++++++++++++++
3 files changed, 36 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 101a0167e76b7..4d4f1db215220 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -1413,29 +1413,23 @@ static bool isTupleInsertInstr(const MachineInstr &MI,
return false;
const TargetRegisterClass *DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
if (!RISCVRI::isVRegClass(DstRC->TSFlags))
return false;
unsigned NF = RISCVRI::getNF(DstRC->TSFlags);
if (NF < 2)
return false;
- // Check whether INSERT_SUBREG was lowered with the correct subreg index.
+ // Check whether INSERT_SUBREG has the correct subreg index for tuple inserts.
auto VLMul = RISCVRI::getLMul(DstRC->TSFlags);
+ unsigned SubRegIdx = MI.getOperand(3).getImm();
[[maybe_unused]] auto [LMul, IsFractional] = RISCVVType::decodeVLMUL(VLMul);
assert(!IsFractional && "unexpected LMUL for tuple register classes");
- [[maybe_unused]] const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
- [[maybe_unused]] unsigned SubRegIdx = MI.getOperand(3).getImm();
- assert(TRI->getSubRegIdxSize(SubRegIdx) == RISCV::RVVBitsPerBlock * LMul &&
- "unexpected subreg index of tuple register class");
- return true;
+ return TRI->getSubRegIdxSize(SubRegIdx) == RISCV::RVVBitsPerBlock * LMul;
}
static bool isSegmentedStoreInstr(const MachineInstr &MI) {
- const RISCVVPseudosTable::PseudoInfo *RVV =
- RISCVVPseudosTable::getPseudoInfo(MI.getOpcode());
- if (!RVV)
- return false;
- switch (RVV->BaseInstr) {
+ switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
case VSSEG_CASES(8):
case VSSSEG_CASES(8):
case VSUXSEG_CASES(8):
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
index e6d2f133ed7fd..cd85853c2d12c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
@@ -2272,6 +2272,21 @@ body: |
PseudoVSSEG3E32_V_M1 killed %8, $noreg, 1, 6 /* e64 */
...
---
+name: vsseg3e32_v_incompatible_insert_subreg
+body: |
+ bb.0:
+
+ ; CHECK-LABEL: name: vsseg3e32_v_incompatible_insert_subreg
+ ; CHECK: [[PseudoVADD_VV_M2_:%[0-9]+]]:vrm2 = PseudoVADD_VV_M2 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 3 /* ta, ma */
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrn3m1 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[DEF]], [[PseudoVADD_VV_M2_]], %subreg.sub_vrm2_0
+ ; CHECK-NEXT: PseudoVSSEG3E32_V_M1 killed [[INSERT_SUBREG]], $noreg, 1, 5 /* e32 */
+ %2:vrm2 = PseudoVADD_VV_M2 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 3 /* ta, ma */
+ %6:vrn3m1 = IMPLICIT_DEF
+ %5:vrn3m1 = INSERT_SUBREG %6, %2, %subreg.sub_vrm2_0
+ PseudoVSSEG3E32_V_M1 killed %5, $noreg, 1, 5 /* e32 */
+...
+---
name: vssseg3e32_v
body: |
bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
index 6b5b984a48789..20608cd6bed87 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
@@ -222,3 +222,19 @@ define <vscale x 8 x i32> @vcompress_add(<vscale x 8 x i32> %a, <vscale x 8 x i3
%compress = call <vscale x 8 x i32> @llvm.riscv.vcompress.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i32> %add, <vscale x 8 x i1> %c, iXLen %vl)
ret <vscale x 8 x i32> %compress
}
+
+; Make sure we peek through INSERT_SUBREG of tuple registers.
+define void @segmented_store_insert_subreg(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, ptr %p, iXLen %vl) {
+; CHECK-LABEL: segmented_store_insert_subreg:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vfadd.vv v10, v8, v10
+; CHECK-NEXT: vsseg3e32.v v8, (a0)
+; CHECK-NEXT: ret
+ %fadd = fadd <vscale x 4 x float> %v0, %v1
+ %t0 = call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.tuple.insert(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) poison, <vscale x 4 x float> %v0, i32 0)
+ %t1 = call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.tuple.insert(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %t0, <vscale x 4 x float> %fadd, i32 1)
+ %t2 = call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.tuple.insert(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %t1, <vscale x 4 x float> %v2, i32 2)
+ call void @llvm.riscv.vsseg3(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %t2, ptr %p, iXLen %vl, iXLen 5)
+ ret void
+}
>From f32e1f4e4b5b2b70cc766ec46ca5c5f717d33c20 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Thu, 4 Sep 2025 11:59:53 -0700
Subject: [PATCH 9/9] fixup! Update tests
---
.../test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
index d394df954cbda..d7d767e600db5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
@@ -421,14 +421,13 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract(<vscale x 2 x i1> %
; RV64-NEXT: vsetvli zero, a2, e32, m2, ta, ma
; RV64-NEXT: vle32.v v10, (a0), v0.t
; RV64-NEXT: li a2, 32
-; RV64-NEXT: vsetvli a3, zero, e32, m1, ta, ma
+; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV64-NEXT: vnsrl.wx v13, v10, a2
; RV64-NEXT: vnsrl.wi v12, v10, 0
-; RV64-NEXT: vmv.x.s a2, v10
+; RV64-NEXT: vmv.x.s a1, v10
; RV64-NEXT: vmv1r.v v0, v8
-; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV64-NEXT: vsseg2e32.v v12, (a0), v0.t
-; RV64-NEXT: mv a0, a2
+; RV64-NEXT: mv a0, a1
; RV64-NEXT: ret
%rvl = mul nuw i32 %evl, 2
%interleaved.mask = tail call <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1> %mask, <vscale x 2 x i1> %mask)
More information about the llvm-commits
mailing list