[llvm] [RISCV] Keep same SEW/LMUL ratio if possible in forward transfer (PR #69788)

via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 20 21:26:39 PDT 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-risc-v

Author: Luke Lau (lukel97)

<details>
<summary>Changes</summary>

For instructions like vmv.s.x and friends where we don't care about LMUL or the
SEW/LMUL ratio, we can change the LMUL in its state so that it has the same
SEW/LMUL ratio as the previous state. This allows us to avoid more VL toggles
later down the line (i.e. use vsetvli zero, zero, which requires that the
SEW/LMUL ratio must be the same)

This is an alternative approach to the idea in #<!-- -->69259, but note that they 
don't catch exactly the same test cases.


---

Patch is 88.36 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/69788.diff


17 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp (+20) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll (+2-2) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll (+2-2) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll (+2-2) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll (+53-55) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll (+107-66) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll (+2-2) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll (+26-39) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll (+1-1) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll (+56-84) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll (+4-4) 
- (modified) llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll (+7-8) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll (+2-2) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll (+12-12) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll (+2-2) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll (+2-3) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vsetvli-valid-elen-fp.ll (+1-2) 


``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 4c99da1244bf50c..ec42622e18792b6 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -1042,6 +1042,26 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
   if (!RISCVII::hasVLOp(TSFlags))
     return;
 
+  // If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
+  // maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
+  // places.
+  DemandedFields Demanded = getDemanded(MI, MRI);
+  if (!Demanded.LMUL && !Demanded.SEWLMULRatio && Info.isValid() &&
+      PrevInfo.isValid() && !Info.isUnknown() && !PrevInfo.isUnknown() &&
+      !Info.hasSameVLMAX(PrevInfo)) {
+    unsigned SEW = Info.getSEW();
+    // Fixed point value with 3 fractional bits.
+    unsigned NewRatio = (SEW * 8) / PrevInfo.getSEWLMULRatio();
+    if (NewRatio >= 1 && NewRatio <= 64) {
+      bool Fractional = NewRatio < 8;
+      RISCVII::VLMUL NewVLMul = RISCVVType::encodeLMUL(
+          Fractional ? 8 / NewRatio : NewRatio / 8, Fractional);
+      unsigned VType = Info.encodeVTYPE();
+      Info.setVTYPE(NewVLMul, SEW, RISCVVType::isTailAgnostic(VType),
+                    RISCVVType::isMaskAgnostic(VType));
+    }
+  }
+
   // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and
   // VL > 0. We can discard the user requested AVL and just use the last
   // one if we can prove it equally zero.  This removes a vsetvli entirely
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll
index 25177734325ce15..83edd49bc963767 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll
@@ -22,9 +22,9 @@ define {<16 x i1>, <16 x i1>} @vector_deinterleave_load_v16i1_v32i1(ptr %p) {
 ; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
 ; CHECK-NEXT:    vadd.vi v12, v11, -16
 ; CHECK-NEXT:    li a0, -256
-; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.s.x v0, a0
-; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
 ; CHECK-NEXT:    vrgather.vv v9, v8, v12, v0.t
 ; CHECK-NEXT:    vmsne.vi v9, v9, 0
 ; CHECK-NEXT:    vadd.vi v12, v11, 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
index cbcca9d2696f4ba..3cc7371c1ce9ac4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
@@ -550,9 +550,9 @@ define void @insertelt_c6_v8i64_0_add(ptr %x, ptr %y) {
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
 ; CHECK-NEXT:    li a2, 6
-; CHECK-NEXT:    vsetivli zero, 8, e64, m1, tu, ma
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, tu, ma
 ; CHECK-NEXT:    vmv.s.x v8, a2
-; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vle64.v v12, (a1)
 ; CHECK-NEXT:    vadd.vv v8, v8, v12
 ; CHECK-NEXT:    vse64.v v8, (a0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index e9412019a0dec8e..85939377c6db08e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -567,9 +567,9 @@ define void @buildvec_seq_v9i8(ptr %x) {
 ; CHECK-NEXT:    vmv.v.i v8, 3
 ; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
 ; CHECK-NEXT:    li a1, 146
-; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.s.x v0, a1
-; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
 ; CHECK-NEXT:    vsetivli zero, 9, e8, m1, ta, ma
 ; CHECK-NEXT:    vse8.v v8, (a0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
index e6868abdb5b1d71..c95d144a970895c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
@@ -1101,21 +1101,20 @@ define void @urem_v2i64(ptr %x, ptr %y) {
 define void @mulhu_v16i8(ptr %x) {
 ; CHECK-LABEL: mulhu_v16i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
 ; CHECK-NEXT:    lui a1, 3
 ; CHECK-NEXT:    addi a1, a1, -2044
-; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
 ; CHECK-NEXT:    vmv.s.x v0, a1
-; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v9, 0
 ; CHECK-NEXT:    li a1, -128
 ; CHECK-NEXT:    vmerge.vxm v10, v9, a1, v0
 ; CHECK-NEXT:    lui a1, 1
 ; CHECK-NEXT:    addi a2, a1, 32
-; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.s.x v0, a2
-; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    lui a2, %hi(.LCPI65_0)
 ; CHECK-NEXT:    addi a2, a2, %lo(.LCPI65_0)
 ; CHECK-NEXT:    vle8.v v11, (a2)
@@ -1126,21 +1125,21 @@ define void @mulhu_v16i8(ptr %x) {
 ; CHECK-NEXT:    vmulhu.vv v8, v8, v10
 ; CHECK-NEXT:    vadd.vv v8, v8, v9
 ; CHECK-NEXT:    li a2, 513
-; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.s.x v0, a2
-; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v9, 4
 ; CHECK-NEXT:    vmerge.vim v9, v9, 1, v0
 ; CHECK-NEXT:    addi a1, a1, 78
-; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.s.x v0, a1
-; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmerge.vim v9, v9, 3, v0
 ; CHECK-NEXT:    lui a1, 8
 ; CHECK-NEXT:    addi a1, a1, 304
-; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.s.x v0, a1
-; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmerge.vim v9, v9, 2, v0
 ; CHECK-NEXT:    vsrl.vv v8, v8, v9
 ; CHECK-NEXT:    vse8.v v8, (a0)
@@ -1204,9 +1203,9 @@ define void @mulhu_v6i16(ptr %x) {
 ; CHECK-NEXT:    vdivu.vv v9, v10, v9
 ; CHECK-NEXT:    lui a1, 45217
 ; CHECK-NEXT:    addi a1, a1, -1785
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vmv.s.x v10, a1
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; CHECK-NEXT:    vsext.vf2 v11, v10
 ; CHECK-NEXT:    vdivu.vv v8, v8, v11
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
@@ -1309,10 +1308,10 @@ define void @mulhs_v16i8(ptr %x) {
 ; CHECK-NEXT:    vmv.v.x v9, a1
 ; CHECK-NEXT:    lui a1, 5
 ; CHECK-NEXT:    addi a1, a1, -1452
-; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.s.x v0, a1
 ; CHECK-NEXT:    li a1, 57
-; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmerge.vxm v9, v9, a1, v0
 ; CHECK-NEXT:    vmulhu.vv v8, v8, v9
 ; CHECK-NEXT:    vmv.v.i v9, 7
@@ -1367,9 +1366,9 @@ define void @mulhs_v6i16(ptr %x) {
 ; CHECK-NEXT:    vdiv.vv v9, v9, v10
 ; CHECK-NEXT:    lui a1, 1020016
 ; CHECK-NEXT:    addi a1, a1, 2041
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vmv.s.x v10, a1
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; CHECK-NEXT:    vsext.vf2 v11, v10
 ; CHECK-NEXT:    vdiv.vv v8, v8, v11
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
@@ -4872,21 +4871,21 @@ define void @mulhu_v32i8(ptr %x) {
 ; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
 ; LMULMAX2-NEXT:    vle8.v v8, (a0)
 ; LMULMAX2-NEXT:    vmv.v.i v10, 0
-; LMULMAX2-NEXT:    lui a2, 163907
-; LMULMAX2-NEXT:    addi a2, a2, -2044
-; LMULMAX2-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-NEXT:    vmv.s.x v0, a2
-; LMULMAX2-NEXT:    li a2, -128
-; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vmerge.vxm v12, v10, a2, v0
-; LMULMAX2-NEXT:    lui a2, 66049
-; LMULMAX2-NEXT:    addi a2, a2, 32
-; LMULMAX2-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-NEXT:    vmv.s.x v0, a2
-; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-NEXT:    lui a2, %hi(.LCPI181_0)
-; LMULMAX2-NEXT:    addi a2, a2, %lo(.LCPI181_0)
-; LMULMAX2-NEXT:    vle8.v v14, (a2)
+; LMULMAX2-NEXT:    lui a1, 163907
+; LMULMAX2-NEXT:    addi a1, a1, -2044
+; LMULMAX2-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; LMULMAX2-NEXT:    vmv.s.x v0, a1
+; LMULMAX2-NEXT:    li a1, -128
+; LMULMAX2-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
+; LMULMAX2-NEXT:    vmerge.vxm v12, v10, a1, v0
+; LMULMAX2-NEXT:    lui a1, 66049
+; LMULMAX2-NEXT:    addi a1, a1, 32
+; LMULMAX2-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; LMULMAX2-NEXT:    vmv.s.x v0, a1
+; LMULMAX2-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
+; LMULMAX2-NEXT:    lui a1, %hi(.LCPI181_0)
+; LMULMAX2-NEXT:    addi a1, a1, %lo(.LCPI181_0)
+; LMULMAX2-NEXT:    vle8.v v14, (a1)
 ; LMULMAX2-NEXT:    vmerge.vim v10, v10, 1, v0
 ; LMULMAX2-NEXT:    vsrl.vv v10, v8, v10
 ; LMULMAX2-NEXT:    vmulhu.vv v10, v10, v14
@@ -4894,23 +4893,23 @@ define void @mulhu_v32i8(ptr %x) {
 ; LMULMAX2-NEXT:    vmulhu.vv v8, v8, v12
 ; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
 ; LMULMAX2-NEXT:    vmv.v.i v10, 4
-; LMULMAX2-NEXT:    lui a2, 8208
-; LMULMAX2-NEXT:    addi a2, a2, 513
-; LMULMAX2-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-NEXT:    vmv.s.x v0, a2
-; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; LMULMAX2-NEXT:    lui a1, 8208
+; LMULMAX2-NEXT:    addi a1, a1, 513
+; LMULMAX2-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; LMULMAX2-NEXT:    vmv.s.x v0, a1
+; LMULMAX2-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
 ; LMULMAX2-NEXT:    vmerge.vim v10, v10, 1, v0
-; LMULMAX2-NEXT:    lui a2, 66785
-; LMULMAX2-NEXT:    addi a2, a2, 78
-; LMULMAX2-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-NEXT:    vmv.s.x v0, a2
-; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; LMULMAX2-NEXT:    lui a1, 66785
+; LMULMAX2-NEXT:    addi a1, a1, 78
+; LMULMAX2-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; LMULMAX2-NEXT:    vmv.s.x v0, a1
+; LMULMAX2-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
 ; LMULMAX2-NEXT:    vmerge.vim v10, v10, 3, v0
-; LMULMAX2-NEXT:    lui a2, 529160
-; LMULMAX2-NEXT:    addi a2, a2, 304
-; LMULMAX2-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-NEXT:    vmv.s.x v0, a2
-; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; LMULMAX2-NEXT:    lui a1, 529160
+; LMULMAX2-NEXT:    addi a1, a1, 304
+; LMULMAX2-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; LMULMAX2-NEXT:    vmv.s.x v0, a1
+; LMULMAX2-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
 ; LMULMAX2-NEXT:    vmerge.vim v10, v10, 2, v0
 ; LMULMAX2-NEXT:    vsrl.vv v8, v8, v10
 ; LMULMAX2-NEXT:    vse8.v v8, (a0)
@@ -5250,11 +5249,11 @@ define void @mulhs_v32i8(ptr %x) {
 ; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
 ; LMULMAX2-NEXT:    vle8.v v8, (a0)
 ; LMULMAX2-NEXT:    vmv.v.i v10, 7
-; LMULMAX2-NEXT:    lui a2, 304453
-; LMULMAX2-NEXT:    addi a2, a2, -1452
-; LMULMAX2-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-NEXT:    vmv.s.x v0, a2
-; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; LMULMAX2-NEXT:    lui a1, 304453
+; LMULMAX2-NEXT:    addi a1, a1, -1452
+; LMULMAX2-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; LMULMAX2-NEXT:    vmv.s.x v0, a1
+; LMULMAX2-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
 ; LMULMAX2-NEXT:    vmerge.vim v10, v10, 1, v0
 ; LMULMAX2-NEXT:    li a1, -123
 ; LMULMAX2-NEXT:    vmv.v.x v12, a1
@@ -5267,15 +5266,14 @@ define void @mulhs_v32i8(ptr %x) {
 ;
 ; LMULMAX1-LABEL: mulhs_v32i8:
 ; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; LMULMAX1-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; LMULMAX1-NEXT:    vle8.v v8, (a0)
 ; LMULMAX1-NEXT:    addi a1, a0, 16
 ; LMULMAX1-NEXT:    vle8.v v9, (a1)
 ; LMULMAX1-NEXT:    lui a2, 5
 ; LMULMAX1-NEXT:    addi a2, a2, -1452
-; LMULMAX1-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
 ; LMULMAX1-NEXT:    vmv.s.x v0, a2
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; LMULMAX1-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
 ; LMULMAX1-NEXT:    vmv.v.i v10, -9
 ; LMULMAX1-NEXT:    vmerge.vim v10, v10, 9, v0
 ; LMULMAX1-NEXT:    vdivu.vv v9, v9, v10
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 6ee0e4525f5ec72..728cf18e1a77d8a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -536,15 +536,16 @@ define <4 x i8> @mgather_truemask_v4i8(<4 x ptr> %ptrs, <4 x i8> %passthru) {
 ; RV64ZVE32F-NEXT:  .LBB9_5: # %cond.load
 ; RV64ZVE32F-NEXT:    ld a2, 0(a0)
 ; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
-; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf4, tu, ma
+; RV64ZVE32F-NEXT:    vsetvli zero, zero, e8, mf2, tu, ma
 ; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
 ; RV64ZVE32F-NEXT:    andi a2, a1, 2
 ; RV64ZVE32F-NEXT:    beqz a2, .LBB9_2
 ; RV64ZVE32F-NEXT:  .LBB9_6: # %cond.load1
 ; RV64ZVE32F-NEXT:    ld a2, 8(a0)
 ; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
-; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, tu, ma
+; RV64ZVE32F-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
+; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, tu, ma
 ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
 ; RV64ZVE32F-NEXT:    andi a2, a1, 4
 ; RV64ZVE32F-NEXT:    beqz a2, .LBB9_3
@@ -636,7 +637,7 @@ define <8 x i8> @mgather_v8i8(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i8> %passthru)
 ; RV64ZVE32F-NEXT:  .LBB11_9: # %cond.load
 ; RV64ZVE32F-NEXT:    ld a2, 0(a0)
 ; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
-; RV64ZVE32F-NEXT:    vsetivli zero, 8, e8, mf2, tu, ma
+; RV64ZVE32F-NEXT:    vsetivli zero, 8, e8, mf4, tu, ma
 ; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
 ; RV64ZVE32F-NEXT:    andi a2, a1, 2
 ; RV64ZVE32F-NEXT:    beqz a2, .LBB11_2
@@ -728,7 +729,7 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8
 ; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
 ; RV64ZVE32F-NEXT:    add a2, a0, a2
 ; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
-; RV64ZVE32F-NEXT:    vsetivli zero, 8, e8, mf2, tu, ma
+; RV64ZVE32F-NEXT:    vsetivli zero, 8, e8, mf4, tu, ma
 ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
 ; RV64ZVE32F-NEXT:  .LBB12_2: # %else
 ; RV64ZVE32F-NEXT:    andi a2, a1, 2
@@ -1259,15 +1260,16 @@ define <4 x i16> @mgather_truemask_v4i16(<4 x ptr> %ptrs, <4 x i16> %passthru) {
 ; RV64ZVE32F-NEXT:  .LBB20_5: # %cond.load
 ; RV64ZVE32F-NEXT:    ld a2, 0(a0)
 ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
-; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, mf2, tu, ma
+; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m1, tu, ma
 ; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
 ; RV64ZVE32F-NEXT:    andi a2, a1, 2
 ; RV64ZVE32F-NEXT:    beqz a2, .LBB20_2
 ; RV64ZVE32F-NEXT:  .LBB20_6: # %cond.load1
 ; RV64ZVE32F-NEXT:    ld a2, 8(a0)
 ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
-; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, tu, ma
+; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
+; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, tu, ma
 ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
 ; RV64ZVE32F-NEXT:    andi a2, a1, 4
 ; RV64ZVE32F-NEXT:    beqz a2, .LBB20_3
@@ -1359,7 +1361,7 @@ define <8 x i16> @mgather_v8i16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i16> %passthr
 ; RV64ZVE32F-NEXT:  .LBB22_9: # %cond.load
 ; RV64ZVE32F-NEXT:    ld a2, 0(a0)
 ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
-; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, tu, ma
+; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, mf2, tu, ma
 ; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
 ; RV64ZVE32F-NEXT:    andi a2, a1, 2
 ; RV64ZVE32F-NEXT:    beqz a2, .LBB22_2
@@ -1454,7 +1456,7 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1>
 ; RV64ZVE32F-NEXT:    slli a2, a2, 1
 ; RV64ZVE32F-NEXT:    add a2, a0, a2
 ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
-; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, tu, ma
+; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, mf2, tu, ma
 ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
 ; RV64ZVE32F-NEXT:  .LBB23_2: # %else
 ; RV64ZVE32F-NEXT:    andi a2, a1, 2
@@ -1466,8 +1468,9 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1>
 ; RV64ZVE32F-NEXT:    slli a2, a2, 1
 ; RV64ZVE32F-NEXT:    add a2, a0, a2
 ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
-; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
+; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
+; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
 ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 1
 ; RV64ZVE32F-NEXT:  .LBB23_4: # %else2
 ; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
@@ -1492,8 +1495,9 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1>
 ; RV64ZVE32F-NEXT:    slli a2, a2, 1
 ; RV64ZVE32F-NEXT:    add a2, a0, a2
 ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
-; RV64ZVE32F-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
+; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
+; RV64ZVE32F-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
 ; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 5
 ; RV64ZVE32F-NEXT:  .LBB23_9: # %else14
 ; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
@@ -1511,8 +1515,9 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1>
 ; RV64ZVE32F-NEXT:    slli a2, a2, 1
 ; RV64ZVE32F-NEXT:    add a2, a0, a2
 ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
-; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
+; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; RV64ZVE32F-NEXT:    vmv.s.x v11, a2
+; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
 ; RV64ZVE32F-NEXT:    vslideup.vi v9, v11, 2
 ; RV64ZVE32F-NEXT:    andi a2, a1, 8
 ; RV64ZVE32F-NEXT:    beqz a2, .LBB23_6
@@ -1523,8 +1528,9 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1>
 ; RV64ZVE32F-NEXT:    slli a2, a2, 1
 ; RV64ZVE32F-NEXT:    add a2, a0, a2
 ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
-; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
+; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
+; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
 ; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 3
 ; RV64ZVE32F-NEXT:    andi a2, a1, 16
 ; RV64ZVE32F-NEXT:    beqz a2, .LBB23_7
@@ -1545,8 +1551,9 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1>
 ; RV64ZVE32F-NEXT:    slli a2, a2, 1
 ; RV64ZVE32F-NEXT:    add a2, a0, a2
 ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
-; RV64ZVE32F-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
+; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
+; RV64ZVE32F-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
 ; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 6
 ; RV64ZVE32F-NEXT:    andi a1, a1, -128
 ; RV64ZVE32F-NEXT:    beqz a1, .LBB23_11
@@ -1557,8 +1564,9 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1>
 ; RV64ZVE32F-NEXT:    slli a1, a1, 1
 ; RV64ZVE32F-NEXT:    add a0, a0, a1
 ; RV64ZVE32F-NEXT:    lh a0, 0(a0)
-; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
+; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 7
 ; RV64ZVE32F-NEXT:    vmv1r.v v8, v9
 ; RV64ZVE32F-NEXT:    ret
@@ -1599,7 +1607,7 @@ define <8 x i16> @...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/69788


More information about the llvm-commits mailing list