[llvm] aee21c3 - [RISCV] AddEdge between mask producer and user of V0 (#146855)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 11 02:57:05 PDT 2025
Author: Liao Chunyu
Date: 2025-07-11T17:57:01+08:00
New Revision: aee21c368b41cd5f7765a31b9dbe77f2bffadd4e
URL: https://github.com/llvm/llvm-project/commit/aee21c368b41cd5f7765a31b9dbe77f2bffadd4e
DIFF: https://github.com/llvm/llvm-project/commit/aee21c368b41cd5f7765a31b9dbe77f2bffadd4e.diff
LOG: [RISCV] AddEdge between mask producer and user of V0 (#146855)
If there are multiple masks producers followed by multiple
masked consumers, may a move(vmv* v0, vx) will be generated
to save mask.
By moving the mask's producer after the mask's use,
the spill can be eliminated, and the move can be removed.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp
llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll
llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp b/llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp
index be54a8c95a978..5464612d86bee 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp
@@ -10,6 +10,10 @@
// instructions and masked instructions, so that we can reduce the live range
// overlaps of mask registers.
//
+// If there are multiple masks producers followed by multiple masked
+// instructions, then at each masked instructions add dependency edges between
+// every producer and masked instruction.
+//
// The reason why we need to do this:
// 1. When tracking register pressure, we don't track physical registers.
// 2. We have a RegisterClass for mask register (which is `VMV0`), but we don't
@@ -68,11 +72,25 @@ class RISCVVectorMaskDAGMutation : public ScheduleDAGMutation {
void apply(ScheduleDAGInstrs *DAG) override {
SUnit *NearestUseV0SU = nullptr;
+ SmallVector<SUnit *, 2> DefMask;
for (SUnit &SU : DAG->SUnits) {
const MachineInstr *MI = SU.getInstr();
- if (MI->findRegisterUseOperand(RISCV::V0, TRI))
+ if (isSoleUseCopyToV0(SU))
+ DefMask.push_back(&SU);
+
+ if (MI->findRegisterUseOperand(RISCV::V0, TRI)) {
NearestUseV0SU = &SU;
+ // Copy may not be a real use, so skip it here.
+ if (DefMask.size() > 1 && !MI->isCopy())
+ for (SUnit *Def : DefMask)
+ if (DAG->canAddEdge(Def, &SU))
+ DAG->addEdge(Def, SDep(&SU, SDep::Artificial));
+
+ if (!DefMask.empty())
+ DefMask.erase(DefMask.begin());
+ }
+
if (NearestUseV0SU && NearestUseV0SU != &SU && isSoleUseCopyToV0(SU) &&
// For LMUL=8 cases, there will be more possibilities to spill.
// FIXME: We should use RegPressureTracker to do fine-grained
diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll b/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
index 0d8aff306252e..2d4fce68f9545 100644
--- a/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
@@ -313,12 +313,12 @@ define i32 @test_nxv128i1(<vscale x 128 x i1> %x) {
; CHECK-NEXT: vslidedown.vx v0, v6, a0
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v6, v7, a1
+; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v7, a0
; CHECK-NEXT: vslidedown.vx v5, v6, a0
-; CHECK-NEXT: vslidedown.vx v4, v7, a0
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vmv1r.v v0, v4
; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t
; CHECK-NEXT: vmv1r.v v0, v5
; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t
@@ -425,13 +425,15 @@ define i32 @test_nxv256i1(<vscale x 256 x i1> %x) {
; CHECK-NEXT: vmerge.vim v16, v8, 1, v0
; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v5, a1
-; CHECK-NEXT: vslidedown.vx v5, v7, a1
-; CHECK-NEXT: vslidedown.vx v4, v6, a1
-; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, mu
+; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vmv1r.v v0, v4
+; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v6, a1
+; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, mu
; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t
-; CHECK-NEXT: vmv1r.v v0, v5
+; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v7, a1
+; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, mu
; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t
; CHECK-NEXT: vadd.vv v8, v16, v8
; CHECK-NEXT: addi a2, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
index 796f8dde58f47..15417da962bd3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
@@ -139,21 +139,20 @@ define i1 @extractelt_nxv128i1(ptr %x, i64 %idx) nounwind {
; RV32-NEXT: slli a3, a3, 4
; RV32-NEXT: sub sp, sp, a3
; RV32-NEXT: andi sp, sp, -64
-; RV32-NEXT: addi a3, sp, 64
; RV32-NEXT: vl8r.v v8, (a0)
; RV32-NEXT: slli a2, a2, 3
; RV32-NEXT: add a0, a0, a2
-; RV32-NEXT: vl8r.v v24, (a0)
+; RV32-NEXT: vl8r.v v16, (a0)
; RV32-NEXT: vsetvli a0, zero, e8, m8, ta, ma
; RV32-NEXT: vmseq.vi v0, v8, 0
-; RV32-NEXT: vmv.v.i v16, 0
-; RV32-NEXT: add a1, a3, a1
-; RV32-NEXT: add a2, a3, a2
-; RV32-NEXT: vmseq.vi v8, v24, 0
-; RV32-NEXT: vmerge.vim v24, v16, 1, v0
-; RV32-NEXT: vs8r.v v24, (a3)
-; RV32-NEXT: vmv1r.v v0, v8
-; RV32-NEXT: vmerge.vim v8, v16, 1, v0
+; RV32-NEXT: vmv.v.i v8, 0
+; RV32-NEXT: vmerge.vim v24, v8, 1, v0
+; RV32-NEXT: vmseq.vi v0, v16, 0
+; RV32-NEXT: addi a0, sp, 64
+; RV32-NEXT: add a1, a0, a1
+; RV32-NEXT: add a2, a0, a2
+; RV32-NEXT: vs8r.v v24, (a0)
+; RV32-NEXT: vmerge.vim v8, v8, 1, v0
; RV32-NEXT: vs8r.v v8, (a2)
; RV32-NEXT: lbu a0, 0(a1)
; RV32-NEXT: addi sp, s0, -80
@@ -179,21 +178,20 @@ define i1 @extractelt_nxv128i1(ptr %x, i64 %idx) nounwind {
; RV64-NEXT: slli a3, a3, 4
; RV64-NEXT: sub sp, sp, a3
; RV64-NEXT: andi sp, sp, -64
-; RV64-NEXT: addi a3, sp, 64
; RV64-NEXT: vl8r.v v8, (a0)
; RV64-NEXT: slli a2, a2, 3
; RV64-NEXT: add a0, a0, a2
-; RV64-NEXT: vl8r.v v24, (a0)
+; RV64-NEXT: vl8r.v v16, (a0)
; RV64-NEXT: vsetvli a0, zero, e8, m8, ta, ma
; RV64-NEXT: vmseq.vi v0, v8, 0
-; RV64-NEXT: vmv.v.i v16, 0
-; RV64-NEXT: add a1, a3, a1
-; RV64-NEXT: add a2, a3, a2
-; RV64-NEXT: vmseq.vi v8, v24, 0
-; RV64-NEXT: vmerge.vim v24, v16, 1, v0
-; RV64-NEXT: vs8r.v v24, (a3)
-; RV64-NEXT: vmv1r.v v0, v8
-; RV64-NEXT: vmerge.vim v8, v16, 1, v0
+; RV64-NEXT: vmv.v.i v8, 0
+; RV64-NEXT: vmerge.vim v24, v8, 1, v0
+; RV64-NEXT: vmseq.vi v0, v16, 0
+; RV64-NEXT: addi a0, sp, 64
+; RV64-NEXT: add a1, a0, a1
+; RV64-NEXT: add a2, a0, a2
+; RV64-NEXT: vs8r.v v24, (a0)
+; RV64-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-NEXT: vs8r.v v8, (a2)
; RV64-NEXT: lbu a0, 0(a1)
; RV64-NEXT: addi sp, s0, -80
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
index 2587411566a3f..fb070b24a4f34 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
@@ -324,24 +324,23 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
; RV32-NEXT: sw s0, 376(sp) # 4-byte Folded Spill
; RV32-NEXT: addi s0, sp, 384
; RV32-NEXT: andi sp, sp, -128
-; RV32-NEXT: zext.b a1, a1
-; RV32-NEXT: mv a2, sp
-; RV32-NEXT: li a3, 128
-; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV32-NEXT: vle8.v v8, (a0)
; RV32-NEXT: addi a0, a0, 128
; RV32-NEXT: vle8.v v16, (a0)
-; RV32-NEXT: add a1, a2, a1
; RV32-NEXT: vmseq.vi v0, v8, 0
-; RV32-NEXT: vmv.v.i v24, 0
-; RV32-NEXT: vmseq.vi v8, v16, 0
-; RV32-NEXT: vmerge.vim v16, v24, 1, v0
-; RV32-NEXT: vse8.v v16, (a2)
-; RV32-NEXT: vmv1r.v v0, v8
-; RV32-NEXT: vmerge.vim v8, v24, 1, v0
-; RV32-NEXT: addi a0, sp, 128
-; RV32-NEXT: vse8.v v8, (a0)
-; RV32-NEXT: lbu a0, 0(a1)
+; RV32-NEXT: vmv.v.i v8, 0
+; RV32-NEXT: vmerge.vim v24, v8, 1, v0
+; RV32-NEXT: vmseq.vi v0, v16, 0
+; RV32-NEXT: zext.b a0, a1
+; RV32-NEXT: mv a1, sp
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: vse8.v v24, (a1)
+; RV32-NEXT: vmerge.vim v8, v8, 1, v0
+; RV32-NEXT: addi a1, sp, 128
+; RV32-NEXT: vse8.v v8, (a1)
+; RV32-NEXT: lbu a0, 0(a0)
; RV32-NEXT: addi sp, s0, -384
; RV32-NEXT: lw ra, 380(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 376(sp) # 4-byte Folded Reload
@@ -355,24 +354,23 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
; RV64-NEXT: sd s0, 368(sp) # 8-byte Folded Spill
; RV64-NEXT: addi s0, sp, 384
; RV64-NEXT: andi sp, sp, -128
-; RV64-NEXT: zext.b a1, a1
-; RV64-NEXT: mv a2, sp
-; RV64-NEXT: li a3, 128
-; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV64-NEXT: vle8.v v8, (a0)
; RV64-NEXT: addi a0, a0, 128
; RV64-NEXT: vle8.v v16, (a0)
-; RV64-NEXT: add a1, a2, a1
; RV64-NEXT: vmseq.vi v0, v8, 0
-; RV64-NEXT: vmv.v.i v24, 0
-; RV64-NEXT: vmseq.vi v8, v16, 0
-; RV64-NEXT: vmerge.vim v16, v24, 1, v0
-; RV64-NEXT: vse8.v v16, (a2)
-; RV64-NEXT: vmv1r.v v0, v8
-; RV64-NEXT: vmerge.vim v8, v24, 1, v0
-; RV64-NEXT: addi a0, sp, 128
-; RV64-NEXT: vse8.v v8, (a0)
-; RV64-NEXT: lbu a0, 0(a1)
+; RV64-NEXT: vmv.v.i v8, 0
+; RV64-NEXT: vmerge.vim v24, v8, 1, v0
+; RV64-NEXT: vmseq.vi v0, v16, 0
+; RV64-NEXT: zext.b a0, a1
+; RV64-NEXT: mv a1, sp
+; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: vse8.v v24, (a1)
+; RV64-NEXT: vmerge.vim v8, v8, 1, v0
+; RV64-NEXT: addi a1, sp, 128
+; RV64-NEXT: vse8.v v8, (a1)
+; RV64-NEXT: lbu a0, 0(a0)
; RV64-NEXT: addi sp, s0, -384
; RV64-NEXT: ld ra, 376(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 368(sp) # 8-byte Folded Reload
@@ -386,24 +384,23 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
; RV32ZBS-NEXT: sw s0, 376(sp) # 4-byte Folded Spill
; RV32ZBS-NEXT: addi s0, sp, 384
; RV32ZBS-NEXT: andi sp, sp, -128
-; RV32ZBS-NEXT: zext.b a1, a1
-; RV32ZBS-NEXT: mv a2, sp
-; RV32ZBS-NEXT: li a3, 128
-; RV32ZBS-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV32ZBS-NEXT: li a2, 128
+; RV32ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV32ZBS-NEXT: vle8.v v8, (a0)
; RV32ZBS-NEXT: addi a0, a0, 128
; RV32ZBS-NEXT: vle8.v v16, (a0)
-; RV32ZBS-NEXT: add a1, a2, a1
; RV32ZBS-NEXT: vmseq.vi v0, v8, 0
-; RV32ZBS-NEXT: vmv.v.i v24, 0
-; RV32ZBS-NEXT: vmseq.vi v8, v16, 0
-; RV32ZBS-NEXT: vmerge.vim v16, v24, 1, v0
-; RV32ZBS-NEXT: vse8.v v16, (a2)
-; RV32ZBS-NEXT: vmv1r.v v0, v8
-; RV32ZBS-NEXT: vmerge.vim v8, v24, 1, v0
-; RV32ZBS-NEXT: addi a0, sp, 128
-; RV32ZBS-NEXT: vse8.v v8, (a0)
-; RV32ZBS-NEXT: lbu a0, 0(a1)
+; RV32ZBS-NEXT: vmv.v.i v8, 0
+; RV32ZBS-NEXT: vmerge.vim v24, v8, 1, v0
+; RV32ZBS-NEXT: vmseq.vi v0, v16, 0
+; RV32ZBS-NEXT: zext.b a0, a1
+; RV32ZBS-NEXT: mv a1, sp
+; RV32ZBS-NEXT: add a0, a1, a0
+; RV32ZBS-NEXT: vse8.v v24, (a1)
+; RV32ZBS-NEXT: vmerge.vim v8, v8, 1, v0
+; RV32ZBS-NEXT: addi a1, sp, 128
+; RV32ZBS-NEXT: vse8.v v8, (a1)
+; RV32ZBS-NEXT: lbu a0, 0(a0)
; RV32ZBS-NEXT: addi sp, s0, -384
; RV32ZBS-NEXT: lw ra, 380(sp) # 4-byte Folded Reload
; RV32ZBS-NEXT: lw s0, 376(sp) # 4-byte Folded Reload
@@ -417,24 +414,23 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
; RV64ZBS-NEXT: sd s0, 368(sp) # 8-byte Folded Spill
; RV64ZBS-NEXT: addi s0, sp, 384
; RV64ZBS-NEXT: andi sp, sp, -128
-; RV64ZBS-NEXT: zext.b a1, a1
-; RV64ZBS-NEXT: mv a2, sp
-; RV64ZBS-NEXT: li a3, 128
-; RV64ZBS-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; RV64ZBS-NEXT: li a2, 128
+; RV64ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV64ZBS-NEXT: vle8.v v8, (a0)
; RV64ZBS-NEXT: addi a0, a0, 128
; RV64ZBS-NEXT: vle8.v v16, (a0)
-; RV64ZBS-NEXT: add a1, a2, a1
; RV64ZBS-NEXT: vmseq.vi v0, v8, 0
-; RV64ZBS-NEXT: vmv.v.i v24, 0
-; RV64ZBS-NEXT: vmseq.vi v8, v16, 0
-; RV64ZBS-NEXT: vmerge.vim v16, v24, 1, v0
-; RV64ZBS-NEXT: vse8.v v16, (a2)
-; RV64ZBS-NEXT: vmv1r.v v0, v8
-; RV64ZBS-NEXT: vmerge.vim v8, v24, 1, v0
-; RV64ZBS-NEXT: addi a0, sp, 128
-; RV64ZBS-NEXT: vse8.v v8, (a0)
-; RV64ZBS-NEXT: lbu a0, 0(a1)
+; RV64ZBS-NEXT: vmv.v.i v8, 0
+; RV64ZBS-NEXT: vmerge.vim v24, v8, 1, v0
+; RV64ZBS-NEXT: vmseq.vi v0, v16, 0
+; RV64ZBS-NEXT: zext.b a0, a1
+; RV64ZBS-NEXT: mv a1, sp
+; RV64ZBS-NEXT: add a0, a1, a0
+; RV64ZBS-NEXT: vse8.v v24, (a1)
+; RV64ZBS-NEXT: vmerge.vim v8, v8, 1, v0
+; RV64ZBS-NEXT: addi a1, sp, 128
+; RV64ZBS-NEXT: vse8.v v8, (a1)
+; RV64ZBS-NEXT: lbu a0, 0(a0)
; RV64ZBS-NEXT: addi sp, s0, -384
; RV64ZBS-NEXT: ld ra, 376(sp) # 8-byte Folded Reload
; RV64ZBS-NEXT: ld s0, 368(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll
index c11319ff335fd..67584ba8a82cf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll
@@ -143,16 +143,15 @@ define void @deinterleave6_0_i8(ptr %in, ptr %out) {
; CHECK-LABEL: deinterleave6_0_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vmv.v.i v0, 2
-; CHECK-NEXT: vmv.v.i v8, 4
; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v9, 8
+; CHECK-NEXT: vslidedown.vi v9, v8, 8
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT: vslidedown.vi v9, v9, 5, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vrgather.vi v9, v10, 4, v0.t
-; CHECK-NEXT: vse8.v v9, (a1)
+; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t
+; CHECK-NEXT: vmv.v.i v0, 4
+; CHECK-NEXT: vrgather.vi v8, v9, 4, v0.t
+; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
entry:
%0 = load <16 x i8>, ptr %in, align 1
@@ -188,16 +187,15 @@ define void @deinterleave7_0_i8(ptr %in, ptr %out) {
; CHECK-LABEL: deinterleave7_0_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vmv.v.i v0, 2
-; CHECK-NEXT: vmv.v.i v8, 4
; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v9, 8
+; CHECK-NEXT: vslidedown.vi v9, v8, 8
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT: vslidedown.vi v9, v9, 6, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vrgather.vi v9, v10, 6, v0.t
-; CHECK-NEXT: vse8.v v9, (a1)
+; CHECK-NEXT: vslidedown.vi v8, v8, 6, v0.t
+; CHECK-NEXT: vmv.v.i v0, 4
+; CHECK-NEXT: vrgather.vi v8, v9, 6, v0.t
+; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
entry:
%0 = load <16 x i8>, ptr %in, align 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
index 206838917d004..ad2ed47e67e64 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
@@ -153,20 +153,19 @@ define <vscale x 2 x i32> @vwop_vscale_sext_i1i32_multiple_users(ptr %x, ptr %y,
; NO_FOLDING: # %bb.0:
; NO_FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu
; NO_FOLDING-NEXT: vlm.v v8, (a0)
-; NO_FOLDING-NEXT: vlm.v v9, (a1)
-; NO_FOLDING-NEXT: vlm.v v10, (a2)
-; NO_FOLDING-NEXT: vmv.v.i v11, 0
+; NO_FOLDING-NEXT: vmv.v.i v10, 0
; NO_FOLDING-NEXT: vmv.v.v v0, v8
-; NO_FOLDING-NEXT: vmerge.vim v12, v11, -1, v0
+; NO_FOLDING-NEXT: vmerge.vim v11, v10, -1, v0
+; NO_FOLDING-NEXT: vlm.v v0, (a1)
+; NO_FOLDING-NEXT: vlm.v v9, (a2)
+; NO_FOLDING-NEXT: vmerge.vim v12, v10, -1, v0
; NO_FOLDING-NEXT: vmv.v.v v0, v9
-; NO_FOLDING-NEXT: vmerge.vim v9, v11, -1, v0
-; NO_FOLDING-NEXT: vmv.v.v v0, v10
-; NO_FOLDING-NEXT: vmerge.vim v10, v11, -1, v0
-; NO_FOLDING-NEXT: vmul.vv v9, v12, v9
-; NO_FOLDING-NEXT: vsub.vv v11, v12, v10
+; NO_FOLDING-NEXT: vmerge.vim v9, v10, -1, v0
+; NO_FOLDING-NEXT: vmul.vv v10, v11, v12
+; NO_FOLDING-NEXT: vsub.vv v11, v11, v9
; NO_FOLDING-NEXT: vmv.v.v v0, v8
-; NO_FOLDING-NEXT: vadd.vi v10, v10, -1, v0.t
-; NO_FOLDING-NEXT: vor.vv v8, v9, v10
+; NO_FOLDING-NEXT: vadd.vi v9, v9, -1, v0.t
+; NO_FOLDING-NEXT: vor.vv v8, v10, v9
; NO_FOLDING-NEXT: vor.vv v8, v8, v11
; NO_FOLDING-NEXT: ret
;
@@ -174,20 +173,19 @@ define <vscale x 2 x i32> @vwop_vscale_sext_i1i32_multiple_users(ptr %x, ptr %y,
; FOLDING: # %bb.0:
; FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu
; FOLDING-NEXT: vlm.v v8, (a0)
-; FOLDING-NEXT: vlm.v v9, (a1)
-; FOLDING-NEXT: vlm.v v10, (a2)
-; FOLDING-NEXT: vmv.v.i v11, 0
+; FOLDING-NEXT: vmv.v.i v10, 0
; FOLDING-NEXT: vmv.v.v v0, v8
-; FOLDING-NEXT: vmerge.vim v12, v11, -1, v0
+; FOLDING-NEXT: vmerge.vim v11, v10, -1, v0
+; FOLDING-NEXT: vlm.v v0, (a1)
+; FOLDING-NEXT: vlm.v v9, (a2)
+; FOLDING-NEXT: vmerge.vim v12, v10, -1, v0
; FOLDING-NEXT: vmv.v.v v0, v9
-; FOLDING-NEXT: vmerge.vim v9, v11, -1, v0
-; FOLDING-NEXT: vmv.v.v v0, v10
-; FOLDING-NEXT: vmerge.vim v10, v11, -1, v0
-; FOLDING-NEXT: vmul.vv v9, v12, v9
-; FOLDING-NEXT: vsub.vv v11, v12, v10
+; FOLDING-NEXT: vmerge.vim v9, v10, -1, v0
+; FOLDING-NEXT: vmul.vv v10, v11, v12
+; FOLDING-NEXT: vsub.vv v11, v11, v9
; FOLDING-NEXT: vmv.v.v v0, v8
-; FOLDING-NEXT: vadd.vi v10, v10, -1, v0.t
-; FOLDING-NEXT: vor.vv v8, v9, v10
+; FOLDING-NEXT: vadd.vi v9, v9, -1, v0.t
+; FOLDING-NEXT: vor.vv v8, v10, v9
; FOLDING-NEXT: vor.vv v8, v8, v11
; FOLDING-NEXT: ret
%a = load <vscale x 2 x i1>, ptr %x
@@ -209,20 +207,19 @@ define <vscale x 2 x i8> @vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, p
; NO_FOLDING: # %bb.0:
; NO_FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu
; NO_FOLDING-NEXT: vlm.v v8, (a0)
-; NO_FOLDING-NEXT: vlm.v v9, (a1)
-; NO_FOLDING-NEXT: vlm.v v10, (a2)
-; NO_FOLDING-NEXT: vmv.v.i v11, 0
+; NO_FOLDING-NEXT: vmv.v.i v10, 0
; NO_FOLDING-NEXT: vmv1r.v v0, v8
-; NO_FOLDING-NEXT: vmerge.vim v12, v11, -1, v0
+; NO_FOLDING-NEXT: vmerge.vim v11, v10, -1, v0
+; NO_FOLDING-NEXT: vlm.v v0, (a1)
+; NO_FOLDING-NEXT: vlm.v v9, (a2)
+; NO_FOLDING-NEXT: vmerge.vim v12, v10, -1, v0
; NO_FOLDING-NEXT: vmv1r.v v0, v9
-; NO_FOLDING-NEXT: vmerge.vim v9, v11, -1, v0
-; NO_FOLDING-NEXT: vmv1r.v v0, v10
-; NO_FOLDING-NEXT: vmerge.vim v10, v11, -1, v0
-; NO_FOLDING-NEXT: vmul.vv v9, v12, v9
-; NO_FOLDING-NEXT: vsub.vv v11, v12, v10
+; NO_FOLDING-NEXT: vmerge.vim v9, v10, -1, v0
+; NO_FOLDING-NEXT: vmul.vv v10, v11, v12
+; NO_FOLDING-NEXT: vsub.vv v11, v11, v9
; NO_FOLDING-NEXT: vmv1r.v v0, v8
-; NO_FOLDING-NEXT: vadd.vi v10, v10, -1, v0.t
-; NO_FOLDING-NEXT: vor.vv v8, v9, v10
+; NO_FOLDING-NEXT: vadd.vi v9, v9, -1, v0.t
+; NO_FOLDING-NEXT: vor.vv v8, v10, v9
; NO_FOLDING-NEXT: vor.vv v8, v8, v11
; NO_FOLDING-NEXT: ret
;
@@ -230,20 +227,19 @@ define <vscale x 2 x i8> @vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, p
; FOLDING: # %bb.0:
; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu
; FOLDING-NEXT: vlm.v v8, (a0)
-; FOLDING-NEXT: vlm.v v9, (a1)
-; FOLDING-NEXT: vlm.v v10, (a2)
-; FOLDING-NEXT: vmv.v.i v11, 0
+; FOLDING-NEXT: vmv.v.i v10, 0
; FOLDING-NEXT: vmv1r.v v0, v8
-; FOLDING-NEXT: vmerge.vim v12, v11, -1, v0
+; FOLDING-NEXT: vmerge.vim v11, v10, -1, v0
+; FOLDING-NEXT: vlm.v v0, (a1)
+; FOLDING-NEXT: vlm.v v9, (a2)
+; FOLDING-NEXT: vmerge.vim v12, v10, -1, v0
; FOLDING-NEXT: vmv1r.v v0, v9
-; FOLDING-NEXT: vmerge.vim v9, v11, -1, v0
-; FOLDING-NEXT: vmv1r.v v0, v10
-; FOLDING-NEXT: vmerge.vim v10, v11, -1, v0
-; FOLDING-NEXT: vmul.vv v9, v12, v9
-; FOLDING-NEXT: vsub.vv v11, v12, v10
+; FOLDING-NEXT: vmerge.vim v9, v10, -1, v0
+; FOLDING-NEXT: vmul.vv v10, v11, v12
+; FOLDING-NEXT: vsub.vv v11, v11, v9
; FOLDING-NEXT: vmv1r.v v0, v8
-; FOLDING-NEXT: vadd.vi v10, v10, -1, v0.t
-; FOLDING-NEXT: vor.vv v8, v9, v10
+; FOLDING-NEXT: vadd.vi v9, v9, -1, v0.t
+; FOLDING-NEXT: vor.vv v8, v10, v9
; FOLDING-NEXT: vor.vv v8, v8, v11
; FOLDING-NEXT: ret
%a = load <vscale x 2 x i1>, ptr %x
@@ -444,16 +440,14 @@ define <vscale x 2 x i32> @vwop_vscale_zext_i1i32_multiple_users(ptr %x, ptr %y,
; NO_FOLDING: # %bb.0:
; NO_FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu
; NO_FOLDING-NEXT: vlm.v v0, (a0)
-; NO_FOLDING-NEXT: vlm.v v8, (a2)
-; NO_FOLDING-NEXT: vlm.v v9, (a1)
-; NO_FOLDING-NEXT: vmv.v.i v10, 0
-; NO_FOLDING-NEXT: vmerge.vim v11, v10, 1, v0
-; NO_FOLDING-NEXT: vmv.v.v v0, v8
-; NO_FOLDING-NEXT: vmerge.vim v8, v10, 1, v0
-; NO_FOLDING-NEXT: vadd.vv v10, v11, v8
-; NO_FOLDING-NEXT: vsub.vv v8, v11, v8
-; NO_FOLDING-NEXT: vmv.v.v v0, v9
-; NO_FOLDING-NEXT: vor.vv v10, v10, v11, v0.t
+; NO_FOLDING-NEXT: vmv.v.i v8, 0
+; NO_FOLDING-NEXT: vmerge.vim v9, v8, 1, v0
+; NO_FOLDING-NEXT: vlm.v v0, (a2)
+; NO_FOLDING-NEXT: vmerge.vim v8, v8, 1, v0
+; NO_FOLDING-NEXT: vlm.v v0, (a1)
+; NO_FOLDING-NEXT: vadd.vv v10, v9, v8
+; NO_FOLDING-NEXT: vsub.vv v8, v9, v8
+; NO_FOLDING-NEXT: vor.vv v10, v10, v9, v0.t
; NO_FOLDING-NEXT: vor.vv v8, v10, v8
; NO_FOLDING-NEXT: ret
;
@@ -461,16 +455,14 @@ define <vscale x 2 x i32> @vwop_vscale_zext_i1i32_multiple_users(ptr %x, ptr %y,
; FOLDING: # %bb.0:
; FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu
; FOLDING-NEXT: vlm.v v0, (a0)
-; FOLDING-NEXT: vlm.v v8, (a2)
-; FOLDING-NEXT: vlm.v v9, (a1)
-; FOLDING-NEXT: vmv.v.i v10, 0
-; FOLDING-NEXT: vmerge.vim v11, v10, 1, v0
-; FOLDING-NEXT: vmv.v.v v0, v8
-; FOLDING-NEXT: vmerge.vim v8, v10, 1, v0
-; FOLDING-NEXT: vadd.vv v10, v11, v8
-; FOLDING-NEXT: vsub.vv v8, v11, v8
-; FOLDING-NEXT: vmv.v.v v0, v9
-; FOLDING-NEXT: vor.vv v10, v10, v11, v0.t
+; FOLDING-NEXT: vmv.v.i v8, 0
+; FOLDING-NEXT: vmerge.vim v9, v8, 1, v0
+; FOLDING-NEXT: vlm.v v0, (a2)
+; FOLDING-NEXT: vmerge.vim v8, v8, 1, v0
+; FOLDING-NEXT: vlm.v v0, (a1)
+; FOLDING-NEXT: vadd.vv v10, v9, v8
+; FOLDING-NEXT: vsub.vv v8, v9, v8
+; FOLDING-NEXT: vor.vv v10, v10, v9, v0.t
; FOLDING-NEXT: vor.vv v8, v10, v8
; FOLDING-NEXT: ret
%a = load <vscale x 2 x i1>, ptr %x
@@ -492,16 +484,14 @@ define <vscale x 2 x i8> @vwop_vscale_zext_i1i8_multiple_users(ptr %x, ptr %y, p
; NO_FOLDING: # %bb.0:
; NO_FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu
; NO_FOLDING-NEXT: vlm.v v0, (a0)
-; NO_FOLDING-NEXT: vlm.v v8, (a2)
-; NO_FOLDING-NEXT: vlm.v v9, (a1)
-; NO_FOLDING-NEXT: vmv.v.i v10, 0
-; NO_FOLDING-NEXT: vmerge.vim v11, v10, 1, v0
-; NO_FOLDING-NEXT: vmv1r.v v0, v8
-; NO_FOLDING-NEXT: vmerge.vim v8, v10, 1, v0
-; NO_FOLDING-NEXT: vadd.vv v10, v11, v8
-; NO_FOLDING-NEXT: vsub.vv v8, v11, v8
-; NO_FOLDING-NEXT: vmv1r.v v0, v9
-; NO_FOLDING-NEXT: vor.vv v10, v10, v11, v0.t
+; NO_FOLDING-NEXT: vmv.v.i v8, 0
+; NO_FOLDING-NEXT: vmerge.vim v9, v8, 1, v0
+; NO_FOLDING-NEXT: vlm.v v0, (a2)
+; NO_FOLDING-NEXT: vmerge.vim v8, v8, 1, v0
+; NO_FOLDING-NEXT: vlm.v v0, (a1)
+; NO_FOLDING-NEXT: vadd.vv v10, v9, v8
+; NO_FOLDING-NEXT: vsub.vv v8, v9, v8
+; NO_FOLDING-NEXT: vor.vv v10, v10, v9, v0.t
; NO_FOLDING-NEXT: vor.vv v8, v10, v8
; NO_FOLDING-NEXT: ret
;
@@ -509,16 +499,14 @@ define <vscale x 2 x i8> @vwop_vscale_zext_i1i8_multiple_users(ptr %x, ptr %y, p
; FOLDING: # %bb.0:
; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu
; FOLDING-NEXT: vlm.v v0, (a0)
-; FOLDING-NEXT: vlm.v v8, (a2)
-; FOLDING-NEXT: vlm.v v9, (a1)
-; FOLDING-NEXT: vmv.v.i v10, 0
-; FOLDING-NEXT: vmerge.vim v11, v10, 1, v0
-; FOLDING-NEXT: vmv1r.v v0, v8
-; FOLDING-NEXT: vmerge.vim v8, v10, 1, v0
-; FOLDING-NEXT: vadd.vv v10, v11, v8
-; FOLDING-NEXT: vsub.vv v8, v11, v8
-; FOLDING-NEXT: vmv1r.v v0, v9
-; FOLDING-NEXT: vor.vv v10, v10, v11, v0.t
+; FOLDING-NEXT: vmv.v.i v8, 0
+; FOLDING-NEXT: vmerge.vim v9, v8, 1, v0
+; FOLDING-NEXT: vlm.v v0, (a2)
+; FOLDING-NEXT: vmerge.vim v8, v8, 1, v0
+; FOLDING-NEXT: vlm.v v0, (a1)
+; FOLDING-NEXT: vadd.vv v10, v9, v8
+; FOLDING-NEXT: vsub.vv v8, v9, v8
+; FOLDING-NEXT: vor.vv v10, v10, v9, v0.t
; FOLDING-NEXT: vor.vv v8, v10, v8
; FOLDING-NEXT: ret
%a = load <vscale x 2 x i1>, ptr %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll
index 9cdec6a9ff2e9..30044ad580143 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll
@@ -494,17 +494,17 @@ define <vscale x 8 x double> @vfmerge_nzv_nxv8f64(<vscale x 8 x double> %va, <vs
define <vscale x 16 x double> @vselect_combine_regression(<vscale x 16 x i64> %va, <vscale x 16 x double> %vb) {
; CHECK-LABEL: vselect_combine_regression:
; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmseq.vi v24, v16, 0
+; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, mu
+; CHECK-NEXT: vmv8r.v v24, v16
; CHECK-NEXT: vmseq.vi v0, v8, 0
; CHECK-NEXT: vmv.v.i v16, 0
-; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: vle64.v v8, (a0), v0.t
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: vle64.v v16, (a1), v0.t
+; CHECK-NEXT: vmseq.vi v0, v24, 0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vle64.v v16, (a0), v0.t
; CHECK-NEXT: ret
%cond = icmp eq <vscale x 16 x i64> %va, zeroinitializer
%sel = select <vscale x 16 x i1> %cond, <vscale x 16 x double> %vb, <vscale x 16 x double> zeroinitializer
More information about the llvm-commits
mailing list