[llvm] [RISCV] Keep same SEW/LMUL ratio if possible in forward transfer (PR #69788)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 20 21:26:39 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Luke Lau (lukel97)
<details>
<summary>Changes</summary>
For instructions like vmv.s.x and friends where we don't care about LMUL or the
SEW/LMUL ratio, we can change the LMUL in its state so that it has the same
SEW/LMUL ratio as the previous state. This allows us to avoid more VL toggles
later down the line (i.e. use vsetvli zero, zero, which requires that the
SEW/LMUL ratio must be the same)
This is an alternative approach to the idea in #<!-- -->69259, but note that they
don't catch exactly the same test cases.
---
Patch is 88.36 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/69788.diff
17 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp (+20)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll (+2-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll (+2-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll (+2-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll (+53-55)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll (+107-66)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll (+2-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll (+26-39)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll (+1-1)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll (+56-84)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll (+4-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll (+7-8)
- (modified) llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll (+2-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll (+12-12)
- (modified) llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll (+2-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll (+2-3)
- (modified) llvm/test/CodeGen/RISCV/rvv/vsetvli-valid-elen-fp.ll (+1-2)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 4c99da1244bf50c..ec42622e18792b6 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -1042,6 +1042,26 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
if (!RISCVII::hasVLOp(TSFlags))
return;
+ // If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
+ // maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
+ // places.
+ DemandedFields Demanded = getDemanded(MI, MRI);
+ if (!Demanded.LMUL && !Demanded.SEWLMULRatio && Info.isValid() &&
+ PrevInfo.isValid() && !Info.isUnknown() && !PrevInfo.isUnknown() &&
+ !Info.hasSameVLMAX(PrevInfo)) {
+ unsigned SEW = Info.getSEW();
+ // Fixed point value with 3 fractional bits.
+ unsigned NewRatio = (SEW * 8) / PrevInfo.getSEWLMULRatio();
+ if (NewRatio >= 1 && NewRatio <= 64) {
+ bool Fractional = NewRatio < 8;
+ RISCVII::VLMUL NewVLMul = RISCVVType::encodeLMUL(
+ Fractional ? 8 / NewRatio : NewRatio / 8, Fractional);
+ unsigned VType = Info.encodeVTYPE();
+ Info.setVTYPE(NewVLMul, SEW, RISCVVType::isTailAgnostic(VType),
+ RISCVVType::isMaskAgnostic(VType));
+ }
+ }
+
// For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and
// VL > 0. We can discard the user requested AVL and just use the last
// one if we can prove it equally zero. This removes a vsetvli entirely
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll
index 25177734325ce15..83edd49bc963767 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll
@@ -22,9 +22,9 @@ define {<16 x i1>, <16 x i1>} @vector_deinterleave_load_v16i1_v32i1(ptr %p) {
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vadd.vi v12, v11, -16
; CHECK-NEXT: li a0, -256
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a0
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu
; CHECK-NEXT: vrgather.vv v9, v8, v12, v0.t
; CHECK-NEXT: vmsne.vi v9, v9, 0
; CHECK-NEXT: vadd.vi v12, v11, 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
index cbcca9d2696f4ba..3cc7371c1ce9ac4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
@@ -550,9 +550,9 @@ define void @insertelt_c6_v8i64_0_add(ptr %x, ptr %y) {
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: li a2, 6
-; CHECK-NEXT: vsetivli zero, 8, e64, m1, tu, ma
+; CHECK-NEXT: vsetvli zero, zero, e64, m4, tu, ma
; CHECK-NEXT: vmv.s.x v8, a2
-; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vle64.v v12, (a1)
; CHECK-NEXT: vadd.vv v8, v8, v12
; CHECK-NEXT: vse64.v v8, (a0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index e9412019a0dec8e..85939377c6db08e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -567,9 +567,9 @@ define void @buildvec_seq_v9i8(ptr %x) {
; CHECK-NEXT: vmv.v.i v8, 3
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: li a1, 146
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: vmerge.vim v8, v8, 2, v0
; CHECK-NEXT: vsetivli zero, 9, e8, m1, ta, ma
; CHECK-NEXT: vse8.v v8, (a0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
index e6868abdb5b1d71..c95d144a970895c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
@@ -1101,21 +1101,20 @@ define void @urem_v2i64(ptr %x, ptr %y) {
define void @mulhu_v16i8(ptr %x) {
; CHECK-LABEL: mulhu_v16i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: lui a1, 3
; CHECK-NEXT: addi a1, a1, -2044
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v9, 0
; CHECK-NEXT: li a1, -128
; CHECK-NEXT: vmerge.vxm v10, v9, a1, v0
; CHECK-NEXT: lui a1, 1
; CHECK-NEXT: addi a2, a1, 32
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a2
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: lui a2, %hi(.LCPI65_0)
; CHECK-NEXT: addi a2, a2, %lo(.LCPI65_0)
; CHECK-NEXT: vle8.v v11, (a2)
@@ -1126,21 +1125,21 @@ define void @mulhu_v16i8(ptr %x) {
; CHECK-NEXT: vmulhu.vv v8, v8, v10
; CHECK-NEXT: vadd.vv v8, v8, v9
; CHECK-NEXT: li a2, 513
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a2
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v9, 4
; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
; CHECK-NEXT: addi a1, a1, 78
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: vmerge.vim v9, v9, 3, v0
; CHECK-NEXT: lui a1, 8
; CHECK-NEXT: addi a1, a1, 304
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: vmerge.vim v9, v9, 2, v0
; CHECK-NEXT: vsrl.vv v8, v8, v9
; CHECK-NEXT: vse8.v v8, (a0)
@@ -1204,9 +1203,9 @@ define void @mulhu_v6i16(ptr %x) {
; CHECK-NEXT: vdivu.vv v9, v10, v9
; CHECK-NEXT: lui a1, 45217
; CHECK-NEXT: addi a1, a1, -1785
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v10, a1
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vsext.vf2 v11, v10
; CHECK-NEXT: vdivu.vv v8, v8, v11
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
@@ -1309,10 +1308,10 @@ define void @mulhs_v16i8(ptr %x) {
; CHECK-NEXT: vmv.v.x v9, a1
; CHECK-NEXT: lui a1, 5
; CHECK-NEXT: addi a1, a1, -1452
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: li a1, 57
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: vmerge.vxm v9, v9, a1, v0
; CHECK-NEXT: vmulhu.vv v8, v8, v9
; CHECK-NEXT: vmv.v.i v9, 7
@@ -1367,9 +1366,9 @@ define void @mulhs_v6i16(ptr %x) {
; CHECK-NEXT: vdiv.vv v9, v9, v10
; CHECK-NEXT: lui a1, 1020016
; CHECK-NEXT: addi a1, a1, 2041
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v10, a1
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vsext.vf2 v11, v10
; CHECK-NEXT: vdiv.vv v8, v8, v11
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
@@ -4872,21 +4871,21 @@ define void @mulhu_v32i8(ptr %x) {
; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; LMULMAX2-NEXT: vle8.v v8, (a0)
; LMULMAX2-NEXT: vmv.v.i v10, 0
-; LMULMAX2-NEXT: lui a2, 163907
-; LMULMAX2-NEXT: addi a2, a2, -2044
-; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-NEXT: vmv.s.x v0, a2
-; LMULMAX2-NEXT: li a2, -128
-; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-NEXT: vmerge.vxm v12, v10, a2, v0
-; LMULMAX2-NEXT: lui a2, 66049
-; LMULMAX2-NEXT: addi a2, a2, 32
-; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-NEXT: vmv.s.x v0, a2
-; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-NEXT: lui a2, %hi(.LCPI181_0)
-; LMULMAX2-NEXT: addi a2, a2, %lo(.LCPI181_0)
-; LMULMAX2-NEXT: vle8.v v14, (a2)
+; LMULMAX2-NEXT: lui a1, 163907
+; LMULMAX2-NEXT: addi a1, a1, -2044
+; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; LMULMAX2-NEXT: vmv.s.x v0, a1
+; LMULMAX2-NEXT: li a1, -128
+; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma
+; LMULMAX2-NEXT: vmerge.vxm v12, v10, a1, v0
+; LMULMAX2-NEXT: lui a1, 66049
+; LMULMAX2-NEXT: addi a1, a1, 32
+; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; LMULMAX2-NEXT: vmv.s.x v0, a1
+; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma
+; LMULMAX2-NEXT: lui a1, %hi(.LCPI181_0)
+; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI181_0)
+; LMULMAX2-NEXT: vle8.v v14, (a1)
; LMULMAX2-NEXT: vmerge.vim v10, v10, 1, v0
; LMULMAX2-NEXT: vsrl.vv v10, v8, v10
; LMULMAX2-NEXT: vmulhu.vv v10, v10, v14
@@ -4894,23 +4893,23 @@ define void @mulhu_v32i8(ptr %x) {
; LMULMAX2-NEXT: vmulhu.vv v8, v8, v12
; LMULMAX2-NEXT: vadd.vv v8, v8, v10
; LMULMAX2-NEXT: vmv.v.i v10, 4
-; LMULMAX2-NEXT: lui a2, 8208
-; LMULMAX2-NEXT: addi a2, a2, 513
-; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-NEXT: vmv.s.x v0, a2
-; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; LMULMAX2-NEXT: lui a1, 8208
+; LMULMAX2-NEXT: addi a1, a1, 513
+; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; LMULMAX2-NEXT: vmv.s.x v0, a1
+; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma
; LMULMAX2-NEXT: vmerge.vim v10, v10, 1, v0
-; LMULMAX2-NEXT: lui a2, 66785
-; LMULMAX2-NEXT: addi a2, a2, 78
-; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-NEXT: vmv.s.x v0, a2
-; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; LMULMAX2-NEXT: lui a1, 66785
+; LMULMAX2-NEXT: addi a1, a1, 78
+; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; LMULMAX2-NEXT: vmv.s.x v0, a1
+; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma
; LMULMAX2-NEXT: vmerge.vim v10, v10, 3, v0
-; LMULMAX2-NEXT: lui a2, 529160
-; LMULMAX2-NEXT: addi a2, a2, 304
-; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-NEXT: vmv.s.x v0, a2
-; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; LMULMAX2-NEXT: lui a1, 529160
+; LMULMAX2-NEXT: addi a1, a1, 304
+; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; LMULMAX2-NEXT: vmv.s.x v0, a1
+; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma
; LMULMAX2-NEXT: vmerge.vim v10, v10, 2, v0
; LMULMAX2-NEXT: vsrl.vv v8, v8, v10
; LMULMAX2-NEXT: vse8.v v8, (a0)
@@ -5250,11 +5249,11 @@ define void @mulhs_v32i8(ptr %x) {
; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; LMULMAX2-NEXT: vle8.v v8, (a0)
; LMULMAX2-NEXT: vmv.v.i v10, 7
-; LMULMAX2-NEXT: lui a2, 304453
-; LMULMAX2-NEXT: addi a2, a2, -1452
-; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-NEXT: vmv.s.x v0, a2
-; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; LMULMAX2-NEXT: lui a1, 304453
+; LMULMAX2-NEXT: addi a1, a1, -1452
+; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; LMULMAX2-NEXT: vmv.s.x v0, a1
+; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma
; LMULMAX2-NEXT: vmerge.vim v10, v10, 1, v0
; LMULMAX2-NEXT: li a1, -123
; LMULMAX2-NEXT: vmv.v.x v12, a1
@@ -5267,15 +5266,14 @@ define void @mulhs_v32i8(ptr %x) {
;
; LMULMAX1-LABEL: mulhs_v32i8:
; LMULMAX1: # %bb.0:
-; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; LMULMAX1-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; LMULMAX1-NEXT: vle8.v v8, (a0)
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vle8.v v9, (a1)
; LMULMAX1-NEXT: lui a2, 5
; LMULMAX1-NEXT: addi a2, a2, -1452
-; LMULMAX1-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; LMULMAX1-NEXT: vmv.s.x v0, a2
-; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; LMULMAX1-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; LMULMAX1-NEXT: vmv.v.i v10, -9
; LMULMAX1-NEXT: vmerge.vim v10, v10, 9, v0
; LMULMAX1-NEXT: vdivu.vv v9, v9, v10
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 6ee0e4525f5ec72..728cf18e1a77d8a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -536,15 +536,16 @@ define <4 x i8> @mgather_truemask_v4i8(<4 x ptr> %ptrs, <4 x i8> %passthru) {
; RV64ZVE32F-NEXT: .LBB9_5: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, tu, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB9_2
; RV64ZVE32F-NEXT: .LBB9_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB9_3
@@ -636,7 +637,7 @@ define <8 x i8> @mgather_v8i8(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i8> %passthru)
; RV64ZVE32F-NEXT: .LBB11_9: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB11_2
@@ -728,7 +729,7 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: .LBB12_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
@@ -1259,15 +1260,16 @@ define <4 x i16> @mgather_truemask_v4i16(<4 x ptr> %ptrs, <4 x i16> %passthru) {
; RV64ZVE32F-NEXT: .LBB20_5: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, tu, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB20_2
; RV64ZVE32F-NEXT: .LBB20_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB20_3
@@ -1359,7 +1361,7 @@ define <8 x i16> @mgather_v8i16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i16> %passthr
; RV64ZVE32F-NEXT: .LBB22_9: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB22_2
@@ -1454,7 +1456,7 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: .LBB23_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
@@ -1466,8 +1468,9 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
; RV64ZVE32F-NEXT: .LBB23_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
@@ -1492,8 +1495,9 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
; RV64ZVE32F-NEXT: .LBB23_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
@@ -1511,8 +1515,9 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB23_6
@@ -1523,8 +1528,9 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB23_7
@@ -1545,8 +1551,9 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB23_11
@@ -1557,8 +1564,9 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: slli a1, a1, 1
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lh a0, 0(a0)
-; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
+; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
@@ -1599,7 +1607,7 @@ define <8 x i16> @...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/69788
More information about the llvm-commits
mailing list