[llvm] Reland "[RISCV] AddEdge between mask producer and user of V0 (#146855)" (PR #148566)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 13 22:37:24 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Liao Chunyu (ChunyuLiao)
<details>
<summary>Changes</summary>
The defmask vector cannot contain instructions that use V0.
---
Patch is 25.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/148566.diff
7 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp (+20-1)
- (modified) llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll (+10-8)
- (modified) llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll (+18-20)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll (+52-56)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll (+12-14)
- (modified) llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll (+72-84)
- (modified) llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll (+7-7)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp b/llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp
index be54a8c95a978..64bfe6a2097ea 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp
@@ -10,6 +10,10 @@
// instructions and masked instructions, so that we can reduce the live range
// overlaps of mask registers.
//
+// If there are multiple masks producers followed by multiple masked
+// instructions, then at each masked instructions add dependency edges between
+// every producer and masked instruction.
+//
// The reason why we need to do this:
// 1. When tracking register pressure, we don't track physical registers.
// 2. We have a RegisterClass for mask register (which is `VMV0`), but we don't
@@ -68,11 +72,26 @@ class RISCVVectorMaskDAGMutation : public ScheduleDAGMutation {
void apply(ScheduleDAGInstrs *DAG) override {
SUnit *NearestUseV0SU = nullptr;
+ SmallVector<SUnit *, 2> DefMask;
for (SUnit &SU : DAG->SUnits) {
const MachineInstr *MI = SU.getInstr();
- if (MI->findRegisterUseOperand(RISCV::V0, TRI))
+ bool UseV0 = MI->findRegisterUseOperand(RISCV::V0, TRI);
+ if (isSoleUseCopyToV0(SU) && !UseV0)
+ DefMask.push_back(&SU);
+
+ if (UseV0) {
NearestUseV0SU = &SU;
+ // Copy may not be a real use, so skip it here.
+ if (DefMask.size() > 1 && !MI->isCopy())
+ for (SUnit *Def : DefMask)
+ if (DAG->canAddEdge(Def, &SU))
+ DAG->addEdge(Def, SDep(&SU, SDep::Artificial));
+
+ if (!DefMask.empty())
+ DefMask.erase(DefMask.begin());
+ }
+
if (NearestUseV0SU && NearestUseV0SU != &SU && isSoleUseCopyToV0(SU) &&
// For LMUL=8 cases, there will be more possibilities to spill.
// FIXME: We should use RegPressureTracker to do fine-grained
diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll b/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
index 0d8aff306252e..2d4fce68f9545 100644
--- a/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
@@ -313,12 +313,12 @@ define i32 @test_nxv128i1(<vscale x 128 x i1> %x) {
; CHECK-NEXT: vslidedown.vx v0, v6, a0
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v6, v7, a1
+; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v7, a0
; CHECK-NEXT: vslidedown.vx v5, v6, a0
-; CHECK-NEXT: vslidedown.vx v4, v7, a0
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vmv1r.v v0, v4
; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t
; CHECK-NEXT: vmv1r.v v0, v5
; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t
@@ -425,13 +425,15 @@ define i32 @test_nxv256i1(<vscale x 256 x i1> %x) {
; CHECK-NEXT: vmerge.vim v16, v8, 1, v0
; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v5, a1
-; CHECK-NEXT: vslidedown.vx v5, v7, a1
-; CHECK-NEXT: vslidedown.vx v4, v6, a1
-; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, mu
+; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vmv1r.v v0, v4
+; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v6, a1
+; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, mu
; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t
-; CHECK-NEXT: vmv1r.v v0, v5
+; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v7, a1
+; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, mu
; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t
; CHECK-NEXT: vadd.vv v8, v16, v8
; CHECK-NEXT: addi a2, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
index 796f8dde58f47..15417da962bd3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
@@ -139,21 +139,20 @@ define i1 @extractelt_nxv128i1(ptr %x, i64 %idx) nounwind {
; RV32-NEXT: slli a3, a3, 4
; RV32-NEXT: sub sp, sp, a3
; RV32-NEXT: andi sp, sp, -64
-; RV32-NEXT: addi a3, sp, 64
; RV32-NEXT: vl8r.v v8, (a0)
; RV32-NEXT: slli a2, a2, 3
; RV32-NEXT: add a0, a0, a2
-; RV32-NEXT: vl8r.v v24, (a0)
+; RV32-NEXT: vl8r.v v16, (a0)
; RV32-NEXT: vsetvli a0, zero, e8, m8, ta, ma
; RV32-NEXT: vmseq.vi v0, v8, 0
-; RV32-NEXT: vmv.v.i v16, 0
-; RV32-NEXT: add a1, a3, a1
-; RV32-NEXT: add a2, a3, a2
-; RV32-NEXT: vmseq.vi v8, v24, 0
-; RV32-NEXT: vmerge.vim v24, v16, 1, v0
-; RV32-NEXT: vs8r.v v24, (a3)
-; RV32-NEXT: vmv1r.v v0, v8
-; RV32-NEXT: vmerge.vim v8, v16, 1, v0
+; RV32-NEXT: vmv.v.i v8, 0
+; RV32-NEXT: vmerge.vim v24, v8, 1, v0
+; RV32-NEXT: vmseq.vi v0, v16, 0
+; RV32-NEXT: addi a0, sp, 64
+; RV32-NEXT: add a1, a0, a1
+; RV32-NEXT: add a2, a0, a2
+; RV32-NEXT: vs8r.v v24, (a0)
+; RV32-NEXT: vmerge.vim v8, v8, 1, v0
; RV32-NEXT: vs8r.v v8, (a2)
; RV32-NEXT: lbu a0, 0(a1)
; RV32-NEXT: addi sp, s0, -80
@@ -179,21 +178,20 @@ define i1 @extractelt_nxv128i1(ptr %x, i64 %idx) nounwind {
; RV64-NEXT: slli a3, a3, 4
; RV64-NEXT: sub sp, sp, a3
; RV64-NEXT: andi sp, sp, -64
-; RV64-NEXT: addi a3, sp, 64
; RV64-NEXT: vl8r.v v8, (a0)
; RV64-NEXT: slli a2, a2, 3
; RV64-NEXT: add a0, a0, a2
-; RV64-NEXT: vl8r.v v24, (a0)
+; RV64-NEXT: vl8r.v v16, (a0)
; RV64-NEXT: vsetvli a0, zero, e8, m8, ta, ma
; RV64-NEXT: vmseq.vi v0, v8, 0
-; RV64-NEXT: vmv.v.i v16, 0
-; RV64-NEXT: add a1, a3, a1
-; RV64-NEXT: add a2, a3, a2
-; RV64-NEXT: vmseq.vi v8, v24, 0
-; RV64-NEXT: vmerge.vim v24, v16, 1, v0
-; RV64-NEXT: vs8r.v v24, (a3)
-; RV64-NEXT: vmv1r.v v0, v8
-; RV64-NEXT: vmerge.vim v8, v16, 1, v0
+; RV64-NEXT: vmv.v.i v8, 0
+; RV64-NEXT: vmerge.vim v24, v8, 1, v0
+; RV64-NEXT: vmseq.vi v0, v16, 0
+; RV64-NEXT: addi a0, sp, 64
+; RV64-NEXT: add a1, a0, a1
+; RV64-NEXT: add a2, a0, a2
+; RV64-NEXT: vs8r.v v24, (a0)
+; RV64-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-NEXT: vs8r.v v8, (a2)
; RV64-NEXT: lbu a0, 0(a1)
; RV64-NEXT: addi sp, s0, -80
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
index 2587411566a3f..fb070b24a4f34 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
@@ -324,24 +324,23 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
; RV32-NEXT: sw s0, 376(sp) # 4-byte Folded Spill
; RV32-NEXT: addi s0, sp, 384
; RV32-NEXT: andi sp, sp, -128
-; RV32-NEXT: zext.b a1, a1
-; RV32-NEXT: mv a2, sp
-; RV32-NEXT: li a3, 128
-; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV32-NEXT: vle8.v v8, (a0)
; RV32-NEXT: addi a0, a0, 128
; RV32-NEXT: vle8.v v16, (a0)
-; RV32-NEXT: add a1, a2, a1
; RV32-NEXT: vmseq.vi v0, v8, 0
-; RV32-NEXT: vmv.v.i v24, 0
-; RV32-NEXT: vmseq.vi v8, v16, 0
-; RV32-NEXT: vmerge.vim v16, v24, 1, v0
-; RV32-NEXT: vse8.v v16, (a2)
-; RV32-NEXT: vmv1r.v v0, v8
-; RV32-NEXT: vmerge.vim v8, v24, 1, v0
-; RV32-NEXT: addi a0, sp, 128
-; RV32-NEXT: vse8.v v8, (a0)
-; RV32-NEXT: lbu a0, 0(a1)
+; RV32-NEXT: vmv.v.i v8, 0
+; RV32-NEXT: vmerge.vim v24, v8, 1, v0
+; RV32-NEXT: vmseq.vi v0, v16, 0
+; RV32-NEXT: zext.b a0, a1
+; RV32-NEXT: mv a1, sp
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: vse8.v v24, (a1)
+; RV32-NEXT: vmerge.vim v8, v8, 1, v0
+; RV32-NEXT: addi a1, sp, 128
+; RV32-NEXT: vse8.v v8, (a1)
+; RV32-NEXT: lbu a0, 0(a0)
; RV32-NEXT: addi sp, s0, -384
; RV32-NEXT: lw ra, 380(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 376(sp) # 4-byte Folded Reload
@@ -355,24 +354,23 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
; RV64-NEXT: sd s0, 368(sp) # 8-byte Folded Spill
; RV64-NEXT: addi s0, sp, 384
; RV64-NEXT: andi sp, sp, -128
-; RV64-NEXT: zext.b a1, a1
-; RV64-NEXT: mv a2, sp
-; RV64-NEXT: li a3, 128
-; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV64-NEXT: vle8.v v8, (a0)
; RV64-NEXT: addi a0, a0, 128
; RV64-NEXT: vle8.v v16, (a0)
-; RV64-NEXT: add a1, a2, a1
; RV64-NEXT: vmseq.vi v0, v8, 0
-; RV64-NEXT: vmv.v.i v24, 0
-; RV64-NEXT: vmseq.vi v8, v16, 0
-; RV64-NEXT: vmerge.vim v16, v24, 1, v0
-; RV64-NEXT: vse8.v v16, (a2)
-; RV64-NEXT: vmv1r.v v0, v8
-; RV64-NEXT: vmerge.vim v8, v24, 1, v0
-; RV64-NEXT: addi a0, sp, 128
-; RV64-NEXT: vse8.v v8, (a0)
-; RV64-NEXT: lbu a0, 0(a1)
+; RV64-NEXT: vmv.v.i v8, 0
+; RV64-NEXT: vmerge.vim v24, v8, 1, v0
+; RV64-NEXT: vmseq.vi v0, v16, 0
+; RV64-NEXT: zext.b a0, a1
+; RV64-NEXT: mv a1, sp
+; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: vse8.v v24, (a1)
+; RV64-NEXT: vmerge.vim v8, v8, 1, v0
+; RV64-NEXT: addi a1, sp, 128
+; RV64-NEXT: vse8.v v8, (a1)
+; RV64-NEXT: lbu a0, 0(a0)
; RV64-NEXT: addi sp, s0, -384
; RV64-NEXT: ld ra, 376(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 368(sp) # 8-byte Folded Reload
@@ -386,24 +384,23 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
; RV32ZBS-NEXT: sw s0, 376(sp) # 4-byte Folded Spill
; RV32ZBS-NEXT: addi s0, sp, 384
; RV32ZBS-NEXT: andi sp, sp, -128
-; RV32ZBS-NEXT: zext.b a1, a1
-; RV32ZBS-NEXT: mv a2, sp
-; RV32ZBS-NEXT: li a3, 128
-; RV32ZBS-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32ZBS-NEXT: li a2, 128
+; RV32ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV32ZBS-NEXT: vle8.v v8, (a0)
; RV32ZBS-NEXT: addi a0, a0, 128
; RV32ZBS-NEXT: vle8.v v16, (a0)
-; RV32ZBS-NEXT: add a1, a2, a1
; RV32ZBS-NEXT: vmseq.vi v0, v8, 0
-; RV32ZBS-NEXT: vmv.v.i v24, 0
-; RV32ZBS-NEXT: vmseq.vi v8, v16, 0
-; RV32ZBS-NEXT: vmerge.vim v16, v24, 1, v0
-; RV32ZBS-NEXT: vse8.v v16, (a2)
-; RV32ZBS-NEXT: vmv1r.v v0, v8
-; RV32ZBS-NEXT: vmerge.vim v8, v24, 1, v0
-; RV32ZBS-NEXT: addi a0, sp, 128
-; RV32ZBS-NEXT: vse8.v v8, (a0)
-; RV32ZBS-NEXT: lbu a0, 0(a1)
+; RV32ZBS-NEXT: vmv.v.i v8, 0
+; RV32ZBS-NEXT: vmerge.vim v24, v8, 1, v0
+; RV32ZBS-NEXT: vmseq.vi v0, v16, 0
+; RV32ZBS-NEXT: zext.b a0, a1
+; RV32ZBS-NEXT: mv a1, sp
+; RV32ZBS-NEXT: add a0, a1, a0
+; RV32ZBS-NEXT: vse8.v v24, (a1)
+; RV32ZBS-NEXT: vmerge.vim v8, v8, 1, v0
+; RV32ZBS-NEXT: addi a1, sp, 128
+; RV32ZBS-NEXT: vse8.v v8, (a1)
+; RV32ZBS-NEXT: lbu a0, 0(a0)
; RV32ZBS-NEXT: addi sp, s0, -384
; RV32ZBS-NEXT: lw ra, 380(sp) # 4-byte Folded Reload
; RV32ZBS-NEXT: lw s0, 376(sp) # 4-byte Folded Reload
@@ -417,24 +414,23 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
; RV64ZBS-NEXT: sd s0, 368(sp) # 8-byte Folded Spill
; RV64ZBS-NEXT: addi s0, sp, 384
; RV64ZBS-NEXT: andi sp, sp, -128
-; RV64ZBS-NEXT: zext.b a1, a1
-; RV64ZBS-NEXT: mv a2, sp
-; RV64ZBS-NEXT: li a3, 128
-; RV64ZBS-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64ZBS-NEXT: li a2, 128
+; RV64ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV64ZBS-NEXT: vle8.v v8, (a0)
; RV64ZBS-NEXT: addi a0, a0, 128
; RV64ZBS-NEXT: vle8.v v16, (a0)
-; RV64ZBS-NEXT: add a1, a2, a1
; RV64ZBS-NEXT: vmseq.vi v0, v8, 0
-; RV64ZBS-NEXT: vmv.v.i v24, 0
-; RV64ZBS-NEXT: vmseq.vi v8, v16, 0
-; RV64ZBS-NEXT: vmerge.vim v16, v24, 1, v0
-; RV64ZBS-NEXT: vse8.v v16, (a2)
-; RV64ZBS-NEXT: vmv1r.v v0, v8
-; RV64ZBS-NEXT: vmerge.vim v8, v24, 1, v0
-; RV64ZBS-NEXT: addi a0, sp, 128
-; RV64ZBS-NEXT: vse8.v v8, (a0)
-; RV64ZBS-NEXT: lbu a0, 0(a1)
+; RV64ZBS-NEXT: vmv.v.i v8, 0
+; RV64ZBS-NEXT: vmerge.vim v24, v8, 1, v0
+; RV64ZBS-NEXT: vmseq.vi v0, v16, 0
+; RV64ZBS-NEXT: zext.b a0, a1
+; RV64ZBS-NEXT: mv a1, sp
+; RV64ZBS-NEXT: add a0, a1, a0
+; RV64ZBS-NEXT: vse8.v v24, (a1)
+; RV64ZBS-NEXT: vmerge.vim v8, v8, 1, v0
+; RV64ZBS-NEXT: addi a1, sp, 128
+; RV64ZBS-NEXT: vse8.v v8, (a1)
+; RV64ZBS-NEXT: lbu a0, 0(a0)
; RV64ZBS-NEXT: addi sp, s0, -384
; RV64ZBS-NEXT: ld ra, 376(sp) # 8-byte Folded Reload
; RV64ZBS-NEXT: ld s0, 368(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll
index c11319ff335fd..67584ba8a82cf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll
@@ -143,16 +143,15 @@ define void @deinterleave6_0_i8(ptr %in, ptr %out) {
; CHECK-LABEL: deinterleave6_0_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vmv.v.i v0, 2
-; CHECK-NEXT: vmv.v.i v8, 4
; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v9, 8
+; CHECK-NEXT: vslidedown.vi v9, v8, 8
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT: vslidedown.vi v9, v9, 5, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vrgather.vi v9, v10, 4, v0.t
-; CHECK-NEXT: vse8.v v9, (a1)
+; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t
+; CHECK-NEXT: vmv.v.i v0, 4
+; CHECK-NEXT: vrgather.vi v8, v9, 4, v0.t
+; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
entry:
%0 = load <16 x i8>, ptr %in, align 1
@@ -188,16 +187,15 @@ define void @deinterleave7_0_i8(ptr %in, ptr %out) {
; CHECK-LABEL: deinterleave7_0_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vmv.v.i v0, 2
-; CHECK-NEXT: vmv.v.i v8, 4
; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v9, 8
+; CHECK-NEXT: vslidedown.vi v9, v8, 8
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT: vslidedown.vi v9, v9, 6, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vrgather.vi v9, v10, 6, v0.t
-; CHECK-NEXT: vse8.v v9, (a1)
+; CHECK-NEXT: vslidedown.vi v8, v8, 6, v0.t
+; CHECK-NEXT: vmv.v.i v0, 4
+; CHECK-NEXT: vrgather.vi v8, v9, 6, v0.t
+; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
entry:
%0 = load <16 x i8>, ptr %in, align 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
index 206838917d004..ad2ed47e67e64 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
@@ -153,20 +153,19 @@ define <vscale x 2 x i32> @vwop_vscale_sext_i1i32_multiple_users(ptr %x, ptr %y,
; NO_FOLDING: # %bb.0:
; NO_FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu
; NO_FOLDING-NEXT: vlm.v v8, (a0)
-; NO_FOLDING-NEXT: vlm.v v9, (a1)
-; NO_FOLDING-NEXT: vlm.v v10, (a2)
-; NO_FOLDING-NEXT: vmv.v.i v11, 0
+; NO_FOLDING-NEXT: vmv.v.i v10, 0
; NO_FOLDING-NEXT: vmv.v.v v0, v8
-; NO_FOLDING-NEXT: vmerge.vim v12, v11, -1, v0
+; NO_FOLDING-NEXT: vmerge.vim v11, v10, -1, v0
+; NO_FOLDING-NEXT: vlm.v v0, (a1)
+; NO_FOLDING-NEXT: vlm.v v9, (a2)
+; NO_FOLDING-NEXT: vmerge.vim v12, v10, -1, v0
; NO_FOLDING-NEXT: vmv.v.v v0, v9
-; NO_FOLDING-NEXT: vmerge.vim v9, v11, -1, v0
-; NO_FOLDING-NEXT: vmv.v.v v0, v10
-; NO_FOLDING-NEXT: vmerge.vim v10, v11, -1, v0
-; NO_FOLDING-NEXT: vmul.vv v9, v12, v9
-; NO_FOLDING-NEXT: vsub.vv v11, v12, v10
+; NO_FOLDING-NEXT: vmerge.vim v9, v10, -1, v0
+; NO_FOLDING-NEXT: vmul.vv v10, v11, v12
+; NO_FOLDING-NEXT: vsub.vv v11, v11, v9
; NO_FOLDING-NEXT: vmv.v.v v0, v8
-; NO_FOLDING-NEXT: vadd.vi v10, v10, -1, v0.t
-; NO_FOLDING-NEXT: vor.vv v8, v9, v10
+; NO_FOLDING-NEXT: vadd.vi v9, v9, -1, v0.t
+; NO_FOLDING-NEXT: vor.vv v8, v10, v9
; NO_FOLDING-NEXT: vor.vv v8, v8, v11
; NO_FOLDING-NEXT: ret
;
@@ -174,20 +173,19 @@ define <vscale x 2 x i32> @vwop_vscale_sext_i1i32_multiple_users(ptr %x, ptr %y,
; FOLDING: # %bb.0:
; FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu
; FOLDING-NEXT: vlm.v v8, (a0)
-; FOLDING-NEXT: vlm.v v9, (a1)
-; FOLDING-NEXT: vlm.v v10, (a2)
-; FOLDING-NEXT: vmv.v.i v11, 0
+; FOLDING-NEXT: vmv.v.i v10, 0
; FOLDING-NEXT: vmv.v.v v0, v8
-; FOLDING-NEXT: vmerge.vim v12, v11, -1, v0
+; FOLDING-NEXT: vmerge.vim v11, v10, -1, v0
+; FOLDING-NEXT: vlm.v v0, (a1)
+; FOLDING-NEXT: vlm.v v9, (a2)
+; FOLDING-NEXT: vmerge.vim v12, v10, -1, v0
; FOLDING-NEXT: vmv.v.v v0, v9
-; FOLDING-NEXT: vmerge.vim v9, v11, -1, v0
-; FOLDING-NEXT: vmv.v.v v0, v10
-; FOLDING-NEXT: vmerge.vim v10, v11, -1, v0
-; FOLDING-NEXT: vmul.vv v9, v12, v9
-; FOLDING-NEXT: vsub.vv v11, v12, v10
+; FOLDING-NEXT: vmerge.vim v9, v10, -1, v0
+; FOLDING-NEXT: vmul.vv v10, v11, v12
+; FOLDING-NEXT: vsub.vv v11, v11, v9
; FOLDING-NEXT: vmv.v.v v0, v8
-; FOLDING-NEXT: vadd.vi v10, v10, -1, v0.t
-; FOLDING-NEXT: vor.vv v8, v9, v10
+; FOLDING-NEXT: vadd.vi v9, v9, -1, v0.t
+; FOLDING-NEXT: vor.vv v8, v10, v9
; FOLDING-NEXT: vor.vv v8, v8, v11
; FOLDING-NEXT: ret
%a = load <vscale x 2 x i1>, ptr %x
@@ -209,20 +207,19 @@ define <vscale x 2 x i8> @vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, p
; NO_FOLDING: # %bb.0:
; NO_FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu
; NO_FOLDING-NEXT: vlm.v v8, (a0)
-; NO_FOLDING-NEXT: vlm.v v9, (a1)
-; NO_FOLDING-NEXT: vlm.v v10, (a2)
-; NO_FOLDING-NEXT: vmv.v.i v11, 0
+; NO_FOLDING-NEXT: vmv.v.i v10, 0
; NO_FOLDING-NEXT: vmv1r.v v0, v8
-; NO_FOLDING-NEXT: vmerge.vim v12, v11, -1, v0
+; NO_FOLDING-NEXT: vmerge.vim v11, v10, -1, v0
+; NO_FOLDING-NEXT: vlm.v v0, (a1)
+; NO_FOLDING-NEXT: vlm.v v9, (a2)
+; NO_FOLDING-NEXT: vmerge.vim v12, v10, -1, v0
; NO_FOLDING-NEXT: vmv1r.v v0, v9
-; NO_FOLDING-NEXT: vmerge.vim v9, v11, -1, v0
-; NO_FOLDING-NEXT: vmv1r.v v0, v10
-; NO_FOLDING-NEXT: vmerge.vim v10, v11, -1, v0
-; NO_FOLDING-NEXT: vmul.vv v9, v12, v9
-; NO_FOLDING-NEXT: vsub.vv v11, v12, v10
+; NO_FOLDING-NEXT: vmerge.vim v9, v10, -1, v0
+; NO_FOLDING-NEXT: vmul.vv v10, v11, v12
+; NO_FOLDING-NEXT: vsub.vv v11, v11, v9
; NO_FOLDING-NEXT: vmv1r.v v0, v8
-; NO_FOLDING-NEXT: vadd.vi v10, v10, -1, v0.t
-; NO_FOLDING-NEXT: vor.vv v8, v9, v10
+; NO_FOLDING-NEXT: vadd.vi v9, v9, -1, v0.t
+; NO_FOLDING-NEXT: vor.vv v8, v10, v9
; NO_FOLDING-NEXT: vor.vv v8, v8, v11
; NO_FOLDING-NEXT: ret
;
@@ -230,20 +227,19 @@ define <vscale x 2 x i8> @vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, p
; FOLDING: # %bb.0:
; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu
; FOLDING-NEXT: vlm.v v8, (a0)
-; FOLDING-NEXT: vlm.v v9, (a1)
-; FOLDING-NEXT: vlm.v v10, (a2)
-; FOLDING-NEXT: vmv.v.i v11, 0
+; FOLDING-NEXT: vmv.v.i v10, 0
; FOLDING-NEXT: vmv1r.v v0, v8
-; FOLDING-NEXT: vmerge.vim v12, v11, -1, v0
+; FOLDING-NEXT: vmerge.vim v11, v10, -1, v0
+; FOLDING-NEXT: vlm.v v0, (a1)
+; FOLDING-NEXT: vlm.v v9, (a2)
+; FOLDING-NEXT: vmerge.vim v12, v10, -1, v0
; FOLDING-NEXT: vmv1r.v v0, v9
-; FOLDING-NEXT: vmerge.vim v9, v11, -1, v0
-; FOLDING-NEXT: vmv1r.v v0, v10
-; FOLDING-NEXT: vmerge.vim v10, v11, -1, v0
-; FOLDING-NEXT: vmul.vv v9, v12, v9
-; FOLDING-NEXT: vsub.vv v11, v12, v10
+; FOLDING-NEXT: vmerge.vim v9, v10, -1, v0
+; FOLDING-NEXT: vmul.vv v10, v11, v12
+; FOLDING-NEXT: vsub.vv v11, v11, v9
; FOLDING-NEXT: vmv1r.v v0, v8
-; FOLDING-NEXT: vadd.vi v10, v10, -1, v0.t
-; FOLDING-NEXT: vor.vv v8, v9, v10
+; FOLDING-NEXT: vadd.vi v9, v9, -1, v0.t
+; FOLDING-NEXT: vor.vv v8, v10, v9
; FOLDING-NEXT: vor.vv v8, v8, v11
; FOLDING-NEXT: ret
%a = load <vscale x 2 x i1>, ptr %x
@@ -444,16 +440,14 @@ define <vscale x 2 x i32> @vwop_vscale_zext_i1i32_multiple_users...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/148566
More information about the llvm-commits
mailing list