[llvm] [SelectionDAG][RISCV] (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)) in getNode. (PR #144565)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 17 10:35:18 PDT 2025
https://github.com/topperc updated https://github.com/llvm/llvm-project/pull/144565
>From a97738c8878734060ea6fad9f5e6e737f02d1893 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Tue, 17 Jun 2025 09:51:05 -0700
Subject: [PATCH 1/2] [SelectionDAG][RISCV] (add (vscale * C0), (vscale * C1))
to (vscale * (C0 + C1)) in getNode.
This is an alternative to the DAGCombine proposed in #144507.
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 6 +
.../CodeGen/RISCV/rvv/extract-subvector.ll | 4 -
.../CodeGen/RISCV/rvv/insert-subvector.ll | 46 +-
llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll | 283 +++---
llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll | 12 +-
llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll | 9 +-
llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll | 3 +-
.../RISCV/rvv/undef-earlyclobber-chain.ll | 2 +-
.../RISCV/rvv/vector-deinterleave-fixed.ll | 194 ++---
.../CodeGen/RISCV/rvv/vector-deinterleave.ll | 18 -
.../RISCV/rvv/vector-interleave-store.ll | 9 +-
.../CodeGen/RISCV/rvv/vector-interleave.ll | 817 ++++++++----------
llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll | 12 +-
.../RISCV/rvv/vp-vector-interleaved-access.ll | 100 +--
14 files changed, 694 insertions(+), 821 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 45a37622a531b..b50d8fe72d5cb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7377,6 +7377,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if ((Opcode == ISD::ADD || Opcode == ISD::SUB) &&
VT.getScalarType() == MVT::i1)
return getNode(ISD::XOR, DL, VT, N1, N2);
+ if (Opcode == ISD::ADD && N1.getOpcode() == ISD::VSCALE &&
+ N2.getOpcode() == ISD::VSCALE) {
+ const APInt &C1 = N1->getConstantOperandAPInt(0);
+ const APInt &C2 = N2->getConstantOperandAPInt(0);
+ return getVScale(DL, VT, C1 + C2);
+ }
break;
case ISD::MUL:
assert(VT.isInteger() && "This operator does not apply to FP types!");
diff --git a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
index 83637e4a71d45..0d288c0426a5e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
@@ -490,8 +490,6 @@ define <vscale x 6 x half> @extract_nxv6f16_nxv12f16_6(<vscale x 12 x half> %in)
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v13, v10, a0
; CHECK-NEXT: vslidedown.vx v12, v9, a0
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v12, v10, a0
; CHECK-NEXT: vmv2r.v v8, v12
; CHECK-NEXT: ret
@@ -545,8 +543,6 @@ define <vscale x 6 x bfloat> @extract_nxv6bf16_nxv12bf16_6(<vscale x 12 x bfloat
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v13, v10, a0
; CHECK-NEXT: vslidedown.vx v12, v9, a0
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v12, v10, a0
; CHECK-NEXT: vmv2r.v v8, v12
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
index ca9cec921b3cd..e6f3c56956f72 100644
--- a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
@@ -80,10 +80,9 @@ define <vscale x 4 x i8> @insert_nxv1i8_nxv4i8_3(<vscale x 4 x i8> %vec, <vscale
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 3
; CHECK-NEXT: slli a1, a0, 1
-; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: add a0, a1, a0
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v9, a1
+; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v9, a0
; CHECK-NEXT: ret
%v = call <vscale x 4 x i8> @llvm.vector.insert.nxv1i8.nxv4i8(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec, i64 3)
ret <vscale x 4 x i8> %v
@@ -246,8 +245,7 @@ define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_1(<vscale x 16 x i32> %vec,
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v16, a0
; CHECK-NEXT: ret
%v = call <vscale x 16 x i32> @llvm.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 1)
@@ -282,8 +280,8 @@ define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_1(<vscale x 16 x i8> %vec, <vsc
; CHECK-LABEL: insert_nxv16i8_nxv1i8_1:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a1, a0, 2
; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: add a1, a0, a0
; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, ma
; CHECK-NEXT: vslideup.vx v8, v10, a0
; CHECK-NEXT: ret
@@ -296,8 +294,9 @@ define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_2(<vscale x 16 x i8> %vec, <vsc
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a1, a0, 3
+; CHECK-NEXT: slli a2, a1, 1
+; CHECK-NEXT: add a1, a2, a1
; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, ma
; CHECK-NEXT: vslideup.vx v8, v10, a0
; CHECK-NEXT: ret
@@ -309,10 +308,10 @@ define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_3(<vscale x 16 x i8> %vec, <vsc
; CHECK-LABEL: insert_nxv16i8_nxv1i8_3:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: slli a1, a0, 1
-; CHECK-NEXT: add a1, a1, a0
-; CHECK-NEXT: add a0, a1, a0
+; CHECK-NEXT: srli a1, a0, 3
+; CHECK-NEXT: slli a2, a1, 1
+; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma
; CHECK-NEXT: vslideup.vx v8, v10, a1
; CHECK-NEXT: ret
@@ -363,8 +362,7 @@ define <vscale x 32 x half> @insert_nxv32f16_nxv2f16_2(<vscale x 32 x half> %vec
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v16, a0
; CHECK-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vector.insert.nxv2f16.nxv32f16(<vscale x 32 x half> %vec, <vscale x 2 x half> %subvec, i64 2)
@@ -376,8 +374,7 @@ define <vscale x 32 x half> @insert_nxv32f16_nxv2f16_26(<vscale x 32 x half> %ve
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v14, v16, a0
; CHECK-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vector.insert.nxv2f16.nxv32f16(<vscale x 32 x half> %vec, <vscale x 2 x half> %subvec, i64 26)
@@ -397,8 +394,9 @@ define <vscale x 32 x half> @insert_nxv32f16_undef_nxv1f16_26(<vscale x 1 x half
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a1, a0, 3
+; CHECK-NEXT: slli a2, a1, 1
+; CHECK-NEXT: add a1, a2, a1
; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v14, v8, a0
; CHECK-NEXT: ret
@@ -422,8 +420,8 @@ define <vscale x 32 x i1> @insert_nxv32i1_nxv8i1_8(<vscale x 32 x i1> %v, <vscal
; CHECK-LABEL: insert_nxv32i1_nxv8i1_8:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a1, a0, 2
; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: add a1, a0, a0
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vx v0, v8, a0
; CHECK-NEXT: ret
@@ -462,10 +460,11 @@ define <vscale x 4 x i1> @insert_nxv4i1_nxv1i1_2(<vscale x 4 x i1> %v, <vscale x
; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: srli a1, a0, 3
-; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
+; CHECK-NEXT: slli a2, a1, 1
+; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vx v9, v8, a0
; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
@@ -570,8 +569,7 @@ define <vscale x 32 x bfloat> @insert_nxv32bf16_nxv2bf16_2(<vscale x 32 x bfloat
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v16, a0
; CHECK-NEXT: ret
%v = call <vscale x 32 x bfloat> @llvm.vector.insert.nxv2bf16.nxv32bf16(<vscale x 32 x bfloat> %vec, <vscale x 2 x bfloat> %subvec, i64 2)
@@ -583,8 +581,7 @@ define <vscale x 32 x bfloat> @insert_nxv32bf16_nxv2bf16_26(<vscale x 32 x bfloa
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v14, v16, a0
; CHECK-NEXT: ret
%v = call <vscale x 32 x bfloat> @llvm.vector.insert.nxv2bf16.nxv32bf16(<vscale x 32 x bfloat> %vec, <vscale x 2 x bfloat> %subvec, i64 26)
@@ -604,8 +601,9 @@ define <vscale x 32 x bfloat> @insert_nxv32bf16_undef_nxv1bf16_26(<vscale x 1 x
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a1, a0, 3
+; CHECK-NEXT: slli a2, a1, 1
+; CHECK-NEXT: add a1, a2, a1
; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v14, v8, a0
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
index 28b27bb75f210..9972df97ad9f4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
@@ -1371,6 +1371,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: mv a3, a1
; CHECK-NEXT: slli a1, a1, 2
+; CHECK-NEXT: add a3, a3, a1
+; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: add a1, a1, a3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
@@ -1378,9 +1380,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vmv8r.v v0, v16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: mv a3, a1
; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: add a3, a3, a1
+; CHECK-NEXT: mv a3, a1
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, a1, a3
; CHECK-NEXT: add a1, sp, a1
@@ -1406,6 +1407,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
; CHECK-NEXT: slli t0, t0, 1
; CHECK-NEXT: mv t1, t0
; CHECK-NEXT: slli t0, t0, 2
+; CHECK-NEXT: add t1, t1, t0
+; CHECK-NEXT: slli t0, t0, 1
; CHECK-NEXT: add t0, t0, t1
; CHECK-NEXT: add t0, sp, t0
; CHECK-NEXT: addi t0, t0, 16
@@ -1413,9 +1416,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
; CHECK-NEXT: vslidedown.vx v16, v8, a1
; CHECK-NEXT: vl8re16.v v8, (a0)
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: mv t0, a0
; CHECK-NEXT: slli a0, a0, 1
-; CHECK-NEXT: add t0, t0, a0
+; CHECK-NEXT: mv t0, a0
; CHECK-NEXT: slli a0, a0, 2
; CHECK-NEXT: add a0, a0, t0
; CHECK-NEXT: add a0, sp, a0
@@ -1445,10 +1447,6 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
; CHECK-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v5, v8, v16, v0.t
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
; CHECK-NEXT: vsetvli zero, a6, e16, m4, ta, ma
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
; CHECK-NEXT: csrr a0, vlenb
@@ -1457,85 +1455,95 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vmfeq.vv v6, v24, v16, v0.t
-; CHECK-NEXT: add a0, a3, a3
+; CHECK-NEXT: vmfeq.vv v7, v24, v16, v0.t
; CHECK-NEXT: bltu a2, a5, .LBB85_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: mv a2, a5
; CHECK-NEXT: .LBB85_4:
-; CHECK-NEXT: sub a5, a2, a4
-; CHECK-NEXT: csrr a6, vlenb
-; CHECK-NEXT: slli a6, a6, 1
-; CHECK-NEXT: mv a7, a6
-; CHECK-NEXT: slli a6, a6, 2
-; CHECK-NEXT: add a6, a6, a7
-; CHECK-NEXT: add a6, sp, a6
-; CHECK-NEXT: addi a6, a6, 16
-; CHECK-NEXT: vl1r.v v7, (a6) # vscale x 8-byte Folded Reload
-; CHECK-NEXT: vsetvli a6, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v7, a3
-; CHECK-NEXT: sltu a6, a2, a5
-; CHECK-NEXT: addi a6, a6, -1
-; CHECK-NEXT: and a5, a6, a5
-; CHECK-NEXT: csrr a6, vlenb
-; CHECK-NEXT: mv a7, a6
-; CHECK-NEXT: slli a6, a6, 1
-; CHECK-NEXT: add a7, a7, a6
-; CHECK-NEXT: slli a6, a6, 3
-; CHECK-NEXT: add a6, a6, a7
-; CHECK-NEXT: add a6, sp, a6
-; CHECK-NEXT: addi a6, a6, 16
-; CHECK-NEXT: vl8r.v v24, (a6) # vscale x 64-byte Folded Reload
-; CHECK-NEXT: vsetvli zero, a5, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28
+; CHECK-NEXT: sub a0, a2, a4
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: mv a6, a5
; CHECK-NEXT: slli a5, a5, 1
-; CHECK-NEXT: add a6, a6, a5
+; CHECK-NEXT: mv a6, a5
; CHECK-NEXT: slli a5, a5, 2
+; CHECK-NEXT: add a6, a6, a5
+; CHECK-NEXT: slli a5, a5, 1
+; CHECK-NEXT: add a5, a5, a6
+; CHECK-NEXT: add a5, sp, a5
+; CHECK-NEXT: addi a5, a5, 16
+; CHECK-NEXT: vl1r.v v8, (a5) # vscale x 8-byte Folded Reload
+; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v8, a3
+; CHECK-NEXT: sltu a5, a2, a0
+; CHECK-NEXT: addi a5, a5, -1
+; CHECK-NEXT: and a0, a5, a0
+; CHECK-NEXT: csrr a5, vlenb
+; CHECK-NEXT: slli a5, a5, 1
+; CHECK-NEXT: mv a6, a5
+; CHECK-NEXT: slli a5, a5, 3
; CHECK-NEXT: add a5, a5, a6
; CHECK-NEXT: add a5, sp, a5
; CHECK-NEXT: addi a5, a5, 16
-; CHECK-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
+; CHECK-NEXT: vl8r.v v24, (a5) # vscale x 64-byte Folded Reload
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: mv a5, a0
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add a0, a0, a5
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vmfeq.vv v4, v16, v24, v0.t
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v6, v5, a3
+; CHECK-NEXT: vmfeq.vv v10, v16, v24, v0.t
+; CHECK-NEXT: vmv1r.v v9, v7
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v9, v5, a3
; CHECK-NEXT: bltu a2, a4, .LBB85_6
; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: mv a2, a4
; CHECK-NEXT: .LBB85_6:
-; CHECK-NEXT: vmv1r.v v0, v7
-; CHECK-NEXT: csrr a4, vlenb
-; CHECK-NEXT: mv a5, a4
-; CHECK-NEXT: slli a4, a4, 1
-; CHECK-NEXT: add a5, a5, a4
-; CHECK-NEXT: slli a4, a4, 3
-; CHECK-NEXT: add a4, a4, a5
-; CHECK-NEXT: add a4, sp, a4
-; CHECK-NEXT: addi a4, a4, 16
-; CHECK-NEXT: vl8r.v v24, (a4) # vscale x 64-byte Folded Reload
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: mv a4, a0
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, a0, a4
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: mv a4, a2
-; CHECK-NEXT: slli a2, a2, 1
-; CHECK-NEXT: add a4, a4, a2
-; CHECK-NEXT: slli a2, a2, 2
-; CHECK-NEXT: add a2, a2, a4
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add a0, a0, a2
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add a2, a2, a0
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: add a0, a0, a2
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v8, v16, v24, v0.t
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v4, a3
-; CHECK-NEXT: add a0, a1, a1
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v6, a1
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v10, a3
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v9, a1
; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: mv a1, a0
@@ -3546,8 +3554,7 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFH-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; ZVFH-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; ZVFH-NEXT: vmfeq.vv v16, v24, v8, v0.t
-; ZVFH-NEXT: add a0, a1, a1
-; ZVFH-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; ZVFH-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; ZVFH-NEXT: vslideup.vx v16, v6, a1
; ZVFH-NEXT: vmv.v.v v0, v16
; ZVFH-NEXT: csrr a0, vlenb
@@ -3576,6 +3583,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: slli a1, a1, 1
; ZVFHMIN-NEXT: mv a3, a1
; ZVFHMIN-NEXT: slli a1, a1, 2
+; ZVFHMIN-NEXT: add a3, a3, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
; ZVFHMIN-NEXT: add a1, a1, a3
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
@@ -3583,9 +3592,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; ZVFHMIN-NEXT: vmv8r.v v0, v16
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: mv a3, a1
; ZVFHMIN-NEXT: slli a1, a1, 1
-; ZVFHMIN-NEXT: add a3, a3, a1
+; ZVFHMIN-NEXT: mv a3, a1
; ZVFHMIN-NEXT: slli a1, a1, 3
; ZVFHMIN-NEXT: add a1, a1, a3
; ZVFHMIN-NEXT: add a1, sp, a1
@@ -3611,6 +3619,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: slli t0, t0, 1
; ZVFHMIN-NEXT: mv t1, t0
; ZVFHMIN-NEXT: slli t0, t0, 2
+; ZVFHMIN-NEXT: add t1, t1, t0
+; ZVFHMIN-NEXT: slli t0, t0, 1
; ZVFHMIN-NEXT: add t0, t0, t1
; ZVFHMIN-NEXT: add t0, sp, t0
; ZVFHMIN-NEXT: addi t0, t0, 16
@@ -3618,9 +3628,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: vslidedown.vx v16, v8, a1
; ZVFHMIN-NEXT: vl8re16.v v8, (a0)
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: mv t0, a0
; ZVFHMIN-NEXT: slli a0, a0, 1
-; ZVFHMIN-NEXT: add t0, t0, a0
+; ZVFHMIN-NEXT: mv t0, a0
; ZVFHMIN-NEXT: slli a0, a0, 2
; ZVFHMIN-NEXT: add a0, a0, t0
; ZVFHMIN-NEXT: add a0, sp, a0
@@ -3650,10 +3659,6 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v5, v8, v16, v0.t
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: add a0, sp, a0
-; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a6, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
; ZVFHMIN-NEXT: csrr a0, vlenb
@@ -3662,85 +3667,95 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v6, v24, v16, v0.t
-; ZVFHMIN-NEXT: add a0, a3, a3
+; ZVFHMIN-NEXT: vmfeq.vv v7, v24, v16, v0.t
; ZVFHMIN-NEXT: bltu a2, a5, .LBB171_4
; ZVFHMIN-NEXT: # %bb.3:
; ZVFHMIN-NEXT: mv a2, a5
; ZVFHMIN-NEXT: .LBB171_4:
-; ZVFHMIN-NEXT: sub a5, a2, a4
-; ZVFHMIN-NEXT: csrr a6, vlenb
-; ZVFHMIN-NEXT: slli a6, a6, 1
-; ZVFHMIN-NEXT: mv a7, a6
-; ZVFHMIN-NEXT: slli a6, a6, 2
-; ZVFHMIN-NEXT: add a6, a6, a7
-; ZVFHMIN-NEXT: add a6, sp, a6
-; ZVFHMIN-NEXT: addi a6, a6, 16
-; ZVFHMIN-NEXT: vl1r.v v7, (a6) # vscale x 8-byte Folded Reload
-; ZVFHMIN-NEXT: vsetvli a6, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT: vslidedown.vx v0, v7, a3
-; ZVFHMIN-NEXT: sltu a6, a2, a5
-; ZVFHMIN-NEXT: addi a6, a6, -1
-; ZVFHMIN-NEXT: and a5, a6, a5
-; ZVFHMIN-NEXT: csrr a6, vlenb
-; ZVFHMIN-NEXT: mv a7, a6
-; ZVFHMIN-NEXT: slli a6, a6, 1
-; ZVFHMIN-NEXT: add a7, a7, a6
-; ZVFHMIN-NEXT: slli a6, a6, 3
-; ZVFHMIN-NEXT: add a6, a6, a7
-; ZVFHMIN-NEXT: add a6, sp, a6
-; ZVFHMIN-NEXT: addi a6, a6, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a6) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT: vsetvli zero, a5, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28
+; ZVFHMIN-NEXT: sub a0, a2, a4
; ZVFHMIN-NEXT: csrr a5, vlenb
-; ZVFHMIN-NEXT: mv a6, a5
; ZVFHMIN-NEXT: slli a5, a5, 1
-; ZVFHMIN-NEXT: add a6, a6, a5
+; ZVFHMIN-NEXT: mv a6, a5
; ZVFHMIN-NEXT: slli a5, a5, 2
+; ZVFHMIN-NEXT: add a6, a6, a5
+; ZVFHMIN-NEXT: slli a5, a5, 1
+; ZVFHMIN-NEXT: add a5, a5, a6
+; ZVFHMIN-NEXT: add a5, sp, a5
+; ZVFHMIN-NEXT: addi a5, a5, 16
+; ZVFHMIN-NEXT: vl1r.v v8, (a5) # vscale x 8-byte Folded Reload
+; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma
+; ZVFHMIN-NEXT: vslidedown.vx v0, v8, a3
+; ZVFHMIN-NEXT: sltu a5, a2, a0
+; ZVFHMIN-NEXT: addi a5, a5, -1
+; ZVFHMIN-NEXT: and a0, a5, a0
+; ZVFHMIN-NEXT: csrr a5, vlenb
+; ZVFHMIN-NEXT: slli a5, a5, 1
+; ZVFHMIN-NEXT: mv a6, a5
+; ZVFHMIN-NEXT: slli a5, a5, 3
; ZVFHMIN-NEXT: add a5, a5, a6
; ZVFHMIN-NEXT: add a5, sp, a5
; ZVFHMIN-NEXT: addi a5, a5, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v24, (a5) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: mv a5, a0
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: add a0, a0, a5
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v4, v16, v24, v0.t
-; ZVFHMIN-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; ZVFHMIN-NEXT: vslideup.vx v6, v5, a3
+; ZVFHMIN-NEXT: vmfeq.vv v10, v16, v24, v0.t
+; ZVFHMIN-NEXT: vmv1r.v v9, v7
+; ZVFHMIN-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; ZVFHMIN-NEXT: vslideup.vx v9, v5, a3
; ZVFHMIN-NEXT: bltu a2, a4, .LBB171_6
; ZVFHMIN-NEXT: # %bb.5:
; ZVFHMIN-NEXT: mv a2, a4
; ZVFHMIN-NEXT: .LBB171_6:
-; ZVFHMIN-NEXT: vmv1r.v v0, v7
-; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: mv a5, a4
-; ZVFHMIN-NEXT: slli a4, a4, 1
-; ZVFHMIN-NEXT: add a5, a5, a4
-; ZVFHMIN-NEXT: slli a4, a4, 3
-; ZVFHMIN-NEXT: add a4, a4, a5
-; ZVFHMIN-NEXT: add a4, sp, a4
-; ZVFHMIN-NEXT: addi a4, a4, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a4) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: mv a4, a0
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add a0, a0, a4
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
-; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: mv a4, a2
-; ZVFHMIN-NEXT: slli a2, a2, 1
-; ZVFHMIN-NEXT: add a4, a4, a2
-; ZVFHMIN-NEXT: slli a2, a2, 2
-; ZVFHMIN-NEXT: add a2, a2, a4
-; ZVFHMIN-NEXT: add a2, sp, a2
-; ZVFHMIN-NEXT: addi a2, a2, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: mv a2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: add a0, a0, a2
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: mv a2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: add a2, a2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: add a0, a0, a2
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v24, v0.t
-; ZVFHMIN-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; ZVFHMIN-NEXT: vslideup.vx v8, v4, a3
-; ZVFHMIN-NEXT: add a0, a1, a1
-; ZVFHMIN-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; ZVFHMIN-NEXT: vslideup.vx v8, v6, a1
+; ZVFHMIN-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; ZVFHMIN-NEXT: vslideup.vx v8, v10, a3
+; ZVFHMIN-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; ZVFHMIN-NEXT: vslideup.vx v8, v9, a1
; ZVFHMIN-NEXT: vmv.v.v v0, v8
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: mv a1, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll
index ae868fed68cab..ff923efe8eb43 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll
@@ -4280,8 +4280,7 @@ define <vscale x 16 x i1> @fcmp_oeq_vf_nx16f64(<vscale x 16 x double> %va) {
; RV32-NEXT: vmfeq.vf v24, v16, fa5
; RV32-NEXT: vmfeq.vf v0, v8, fa5
; RV32-NEXT: srli a0, a0, 3
-; RV32-NEXT: add a1, a0, a0
-; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV32-NEXT: vslideup.vx v0, v24, a0
; RV32-NEXT: ret
;
@@ -4293,8 +4292,7 @@ define <vscale x 16 x i1> @fcmp_oeq_vf_nx16f64(<vscale x 16 x double> %va) {
; RV64-NEXT: vmfeq.vf v24, v16, fa5
; RV64-NEXT: vmfeq.vf v0, v8, fa5
; RV64-NEXT: srli a0, a0, 3
-; RV64-NEXT: add a1, a0, a0
-; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV64-NEXT: vslideup.vx v0, v24, a0
; RV64-NEXT: ret
;
@@ -4306,8 +4304,7 @@ define <vscale x 16 x i1> @fcmp_oeq_vf_nx16f64(<vscale x 16 x double> %va) {
; ZVFHMIN32-NEXT: vmfeq.vf v24, v16, fa5
; ZVFHMIN32-NEXT: vmfeq.vf v0, v8, fa5
; ZVFHMIN32-NEXT: srli a0, a0, 3
-; ZVFHMIN32-NEXT: add a1, a0, a0
-; ZVFHMIN32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; ZVFHMIN32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; ZVFHMIN32-NEXT: vslideup.vx v0, v24, a0
; ZVFHMIN32-NEXT: ret
;
@@ -4319,8 +4316,7 @@ define <vscale x 16 x i1> @fcmp_oeq_vf_nx16f64(<vscale x 16 x double> %va) {
; ZVFHMIN64-NEXT: vmfeq.vf v24, v16, fa5
; ZVFHMIN64-NEXT: vmfeq.vf v0, v8, fa5
; ZVFHMIN64-NEXT: srli a0, a0, 3
-; ZVFHMIN64-NEXT: add a1, a0, a0
-; ZVFHMIN64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; ZVFHMIN64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; ZVFHMIN64-NEXT: vslideup.vx v0, v24, a0
; ZVFHMIN64-NEXT: ret
%vc = fcmp oeq <vscale x 16 x double> %va, zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll
index ef560a7631dee..13c63d9c80a9a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll
@@ -2246,8 +2246,7 @@ define <vscale x 32 x i1> @icmp_eq_vv_nxv32i32(<vscale x 32 x i32> %va, <vscale
; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; CHECK-NEXT: vmseq.vv v16, v24, v8, v0.t
-; CHECK-NEXT: add a0, a1, a1
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v16, v6, a1
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: csrr a0, vlenb
@@ -2283,8 +2282,7 @@ define <vscale x 32 x i1> @icmp_eq_vx_nxv32i32(<vscale x 32 x i32> %va, i32 %b,
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t
-; CHECK-NEXT: add a0, a2, a2
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v16, v25, a2
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: ret
@@ -2316,8 +2314,7 @@ define <vscale x 32 x i1> @icmp_eq_vx_swap_nxv32i32(<vscale x 32 x i32> %va, i32
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t
-; CHECK-NEXT: add a0, a2, a2
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v16, v25, a2
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll
index bd3c29b0c6efc..a85b471530cc9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll
@@ -3001,9 +3001,8 @@ define <vscale x 16 x i1> @icmp_eq_vi_nx16i64(<vscale x 16 x i64> %va) {
; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; CHECK-NEXT: vmseq.vi v24, v16, 0
; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: add a1, a0, a0
; CHECK-NEXT: vmseq.vi v0, v8, 0
-; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; CHECK-NEXT: vslideup.vx v0, v24, a0
; CHECK-NEXT: ret
%vc = icmp eq <vscale x 16 x i64> %va, zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll b/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll
index c9f9a79733003..790cd56ee952c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll
@@ -48,10 +48,10 @@ define internal void @SubRegLivenessUndefInPhi(i64 %cond) {
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
; CHECK-NEXT: vid.v v9
; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a1, a0, 2
; CHECK-NEXT: srli a0, a0, 3
; CHECK-NEXT: vadd.vi v10, v9, 1
; CHECK-NEXT: vadd.vi v11, v9, 3
-; CHECK-NEXT: add a1, a0, a0
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v9, a0
; CHECK-NEXT: vslideup.vx v12, v10, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
index ca7f2563e4fc9..a42b2b8c4c6ec 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
@@ -191,8 +191,7 @@ define {<2 x i32>, <2 x i32>, <2 x i32>} @vector_deinterleave3_v2i32_v6i32(<6 x
; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma
; CHECK-NEXT: vslidedown.vi v10, v8, 4
; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v12, a0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vmv1r.v v9, v10
@@ -222,8 +221,7 @@ define {<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>} @vector_deinterleave4_v2i32_
; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v9, v8, 2
; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v12, v10, a0
; CHECK-NEXT: vslideup.vx v8, v9, a0
; CHECK-NEXT: addi a0, sp, 16
@@ -254,15 +252,13 @@ define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @vector_deinterle
; CHECK-NEXT: vslidedown.vi v14, v8, 2
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
; CHECK-NEXT: vslidedown.vi v10, v8, 8
-; CHECK-NEXT: srli a1, a0, 3
-; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a2, a1, a1
-; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v13, v12, a1
-; CHECK-NEXT: vslideup.vx v8, v14, a1
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v13, a0
+; CHECK-NEXT: srli a1, a0, 2
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v13, v12, a0
+; CHECK-NEXT: vslideup.vx v8, v14, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v13, a1
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vmv1r.v v9, v10
; CHECK-NEXT: vs2r.v v8, (a0)
@@ -292,16 +288,14 @@ define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @vecto
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
; CHECK-NEXT: vslidedown.vi v10, v8, 10
; CHECK-NEXT: vslidedown.vi v12, v8, 8
-; CHECK-NEXT: srli a1, a0, 3
-; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a2, a1, a1
-; CHECK-NEXT: add a3, a0, a0
-; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v15, v14, a1
-; CHECK-NEXT: vslideup.vx v8, v16, a1
-; CHECK-NEXT: vslideup.vx v12, v10, a1
-; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v15, a0
+; CHECK-NEXT: srli a1, a0, 2
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v15, v14, a0
+; CHECK-NEXT: vslideup.vx v8, v16, a0
+; CHECK-NEXT: vslideup.vx v12, v10, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v15, a1
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vmv1r.v v9, v12
; CHECK-NEXT: vs2r.v v8, (a0)
@@ -330,24 +324,20 @@ define {<2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>} @v
; CHECK-NEXT: vslidedown.vi v12, v8, 2
; CHECK-NEXT: vslidedown.vi v13, v8, 4
; CHECK-NEXT: vslidedown.vi v14, v8, 6
-; CHECK-NEXT: srli a1, a0, 3
-; CHECK-NEXT: srli a2, a0, 2
+; CHECK-NEXT: srli a1, a0, 2
+; CHECK-NEXT: srli a2, a0, 3
; CHECK-NEXT: srli a0, a0, 1
-; CHECK-NEXT: add a3, a1, a1
-; CHECK-NEXT: add a4, a2, a1
-; CHECK-NEXT: vsetvli zero, a3, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vx v10, v9, a1
-; CHECK-NEXT: vslideup.vx v8, v12, a1
-; CHECK-NEXT: slli a3, a1, 1
-; CHECK-NEXT: vsetvli zero, a4, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vx v10, v11, a2
-; CHECK-NEXT: vslideup.vx v8, v13, a2
-; CHECK-NEXT: add a2, a0, a0
-; CHECK-NEXT: add a3, a3, a1
-; CHECK-NEXT: add a1, a3, a1
-; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v14, a3
-; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
+; CHECK-NEXT: vslideup.vx v10, v9, a2
+; CHECK-NEXT: slli a3, a2, 1
+; CHECK-NEXT: vslideup.vx v8, v12, a2
+; CHECK-NEXT: add a2, a3, a2
+; CHECK-NEXT: vsetvli zero, a2, e8, mf2, tu, ma
+; CHECK-NEXT: vslideup.vx v10, v11, a1
+; CHECK-NEXT: vslideup.vx v8, v13, a1
+; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v14, a2
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v10, a0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs1r.v v8, (a0)
@@ -376,25 +366,21 @@ define {<2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2
; CHECK-NEXT: vslidedown.vi v13, v8, 2
; CHECK-NEXT: vslidedown.vi v14, v8, 4
; CHECK-NEXT: vslidedown.vi v15, v8, 6
-; CHECK-NEXT: srli a1, a0, 3
-; CHECK-NEXT: srli a2, a0, 2
+; CHECK-NEXT: srli a1, a0, 2
+; CHECK-NEXT: srli a2, a0, 3
; CHECK-NEXT: srli a0, a0, 1
-; CHECK-NEXT: add a3, a1, a1
-; CHECK-NEXT: add a4, a2, a1
-; CHECK-NEXT: slli a5, a1, 1
-; CHECK-NEXT: add a6, a0, a0
-; CHECK-NEXT: vsetvli zero, a3, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vx v10, v9, a1
-; CHECK-NEXT: add a5, a5, a1
-; CHECK-NEXT: vslideup.vx v8, v13, a1
-; CHECK-NEXT: vsetvli zero, a4, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vx v10, v11, a2
-; CHECK-NEXT: add a1, a5, a1
-; CHECK-NEXT: vslideup.vx v8, v14, a2
-; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v10, v12, a5
-; CHECK-NEXT: vslideup.vx v8, v15, a5
-; CHECK-NEXT: vsetvli zero, a6, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
+; CHECK-NEXT: vslideup.vx v10, v9, a2
+; CHECK-NEXT: slli a3, a2, 1
+; CHECK-NEXT: vslideup.vx v8, v13, a2
+; CHECK-NEXT: add a2, a3, a2
+; CHECK-NEXT: vsetvli zero, a2, e8, mf2, tu, ma
+; CHECK-NEXT: vslideup.vx v10, v11, a1
+; CHECK-NEXT: vslideup.vx v8, v14, a1
+; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v10, v12, a2
+; CHECK-NEXT: vslideup.vx v8, v15, a2
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v10, a0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs1r.v v8, (a0)
@@ -555,8 +541,7 @@ define {<2 x float>, <2 x float>, <2 x float>} @vector_deinterleave3_v6f32_v2f32
; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma
; CHECK-NEXT: vslidedown.vi v10, v8, 4
; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v12, a0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vmv1r.v v9, v10
@@ -590,8 +575,7 @@ define {<2 x float>, <2 x float>, <2 x float>, <2 x float>} @vector_deinterleave
; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v9, v8, 2
; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v12, v10, a0
; CHECK-NEXT: vslideup.vx v8, v9, a0
; CHECK-NEXT: addi a0, sp, 16
@@ -626,15 +610,13 @@ define {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @vector_dein
; CHECK-NEXT: vslidedown.vi v14, v8, 2
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
; CHECK-NEXT: vslidedown.vi v10, v8, 8
-; CHECK-NEXT: srli a1, a0, 3
-; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a2, a1, a1
-; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v13, v12, a1
-; CHECK-NEXT: vslideup.vx v8, v14, a1
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v13, a0
+; CHECK-NEXT: srli a1, a0, 2
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v13, v12, a0
+; CHECK-NEXT: vslideup.vx v8, v14, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v13, a1
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vmv1r.v v9, v10
; CHECK-NEXT: vs2r.v v8, (a0)
@@ -668,16 +650,14 @@ define {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>}
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
; CHECK-NEXT: vslidedown.vi v10, v8, 10
; CHECK-NEXT: vslidedown.vi v12, v8, 8
-; CHECK-NEXT: srli a1, a0, 3
-; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a2, a1, a1
-; CHECK-NEXT: add a3, a0, a0
-; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v15, v14, a1
-; CHECK-NEXT: vslideup.vx v8, v16, a1
-; CHECK-NEXT: vslideup.vx v12, v10, a1
-; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v15, a0
+; CHECK-NEXT: srli a1, a0, 2
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v15, v14, a0
+; CHECK-NEXT: vslideup.vx v8, v16, a0
+; CHECK-NEXT: vslideup.vx v12, v10, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v15, a1
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vmv1r.v v9, v12
; CHECK-NEXT: vs2r.v v8, (a0)
@@ -711,21 +691,18 @@ define {<1 x half>, <1 x half>, <1 x half>, <1 x half>, <1 x half>, <1 x half>,
; CHECK-NEXT: vmv1r.v v10, v8
; CHECK-NEXT: vslidedown.vi v13, v8, 5
; CHECK-NEXT: vslidedown.vi v14, v8, 6
-; CHECK-NEXT: srli a1, a0, 3
-; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a2, a1, a1
-; CHECK-NEXT: add a3, a0, a0
-; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v11, v9, a1
-; CHECK-NEXT: vslideup.vx v10, v12, a1
-; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v10, v11, a0
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: srli a1, a0, 2
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v11, v9, a0
+; CHECK-NEXT: vslideup.vx v10, v12, a0
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v10, v11, a1
; CHECK-NEXT: vslidedown.vi v11, v8, 4
-; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v11, v13, a1
-; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v11, v14, a0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v11, v13, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v11, v14, a1
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs2r.v v10, (a0)
; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
@@ -755,25 +732,22 @@ define {<1 x half>, <1 x half>, <1 x half>, <1 x half>, <1 x half>, <1 x half>,
; CHECK-NEXT: vslidedown.vi v10, v8, 7
; CHECK-NEXT: vslidedown.vi v11, v8, 6
; CHECK-NEXT: vslidedown.vi v12, v8, 5
-; CHECK-NEXT: srli a1, a0, 3
+; CHECK-NEXT: srli a1, a0, 2
+; CHECK-NEXT: srli a0, a0, 3
; CHECK-NEXT: vslidedown.vi v9, v8, 4
-; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a2, a1, a1
-; CHECK-NEXT: add a3, a0, a0
-; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v11, v10, a1
-; CHECK-NEXT: vslideup.vx v9, v12, a1
-; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v9, v11, a0
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v11, v10, a0
+; CHECK-NEXT: vslideup.vx v9, v12, a0
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v9, v11, a1
; CHECK-NEXT: vslidedown.vi v10, v8, 3
; CHECK-NEXT: vslidedown.vi v11, v8, 2
; CHECK-NEXT: vslidedown.vi v12, v8, 1
-; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v11, v10, a1
-; CHECK-NEXT: vslideup.vx v8, v12, a1
-; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v11, a0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v11, v10, a0
+; CHECK-NEXT: vslideup.vx v8, v12, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v11, a1
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs2r.v v8, (a0)
; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
index 6a08f5a28a295..45ffd8452280d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
@@ -2712,16 +2712,10 @@ define {<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x half>, <vscale x
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v11, v9, a0
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v11, a0
-; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v11, v8, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v11, a0
-; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v11, v10, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v10, v11, a0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs4r.v v8, (a0)
@@ -2808,16 +2802,10 @@ define {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vs
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v11, v9, a0
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v11, a0
-; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v11, v8, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v11, a0
-; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v11, v10, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v10, v11, a0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs4r.v v8, (a0)
@@ -2904,16 +2892,10 @@ define {<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscal
; CHECK-NEXT: srli a0, a0, 3
; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v11, v9, a0
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v11, a0
-; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v11, v8, a0
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v11, a0
-; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v11, v10, a0
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v10, v11, a0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs4r.v v8, (a0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
index 3751967f18aa4..a5811e697634a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
@@ -14,18 +14,17 @@ define void @vector_interleave_store_nxv32i1_nxv16i1(<vscale x 16 x i1> %a, <vsc
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: li a1, -1
-; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: vmerge.vim v12, v10, 1, v0
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vmerge.vim v14, v10, 1, v0
-; CHECK-NEXT: srli a2, a2, 2
; CHECK-NEXT: vwaddu.vv v8, v14, v12
; CHECK-NEXT: vwmaccu.vx v8, a1, v12
+; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: vmsne.vi v12, v10, 0
; CHECK-NEXT: vmsne.vi v10, v8, 0
-; CHECK-NEXT: add a1, a2, a2
-; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v10, v12, a2
+; CHECK-NEXT: srli a1, a1, 2
+; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v10, v12, a1
; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma
; CHECK-NEXT: vsm.v v10, (a0)
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
index e297e88c71f1b..01cc5c58b24ce 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
@@ -17,18 +17,17 @@ define <vscale x 32 x i1> @vector_interleave_nxv32i1_nxv16i1(<vscale x 16 x i1>
; V-NEXT: vmv1r.v v0, v8
; V-NEXT: vmv.v.i v10, 0
; V-NEXT: li a0, -1
-; V-NEXT: csrr a1, vlenb
; V-NEXT: vmerge.vim v12, v10, 1, v0
; V-NEXT: vmv1r.v v0, v9
; V-NEXT: vmerge.vim v14, v10, 1, v0
-; V-NEXT: srli a1, a1, 2
; V-NEXT: vwaddu.vv v8, v14, v12
; V-NEXT: vwmaccu.vx v8, a0, v12
+; V-NEXT: csrr a0, vlenb
; V-NEXT: vmsne.vi v12, v10, 0
; V-NEXT: vmsne.vi v0, v8, 0
-; V-NEXT: add a0, a1, a1
-; V-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; V-NEXT: vslideup.vx v0, v12, a1
+; V-NEXT: srli a0, a0, 2
+; V-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; V-NEXT: vslideup.vx v0, v12, a0
; V-NEXT: ret
;
; ZVBB-LABEL: vector_interleave_nxv32i1_nxv16i1:
@@ -38,17 +37,16 @@ define <vscale x 32 x i1> @vector_interleave_nxv32i1_nxv16i1(<vscale x 16 x i1>
; ZVBB-NEXT: vmv1r.v v0, v8
; ZVBB-NEXT: vmv.v.i v10, 0
; ZVBB-NEXT: li a0, 1
-; ZVBB-NEXT: csrr a1, vlenb
; ZVBB-NEXT: vmerge.vim v10, v10, 1, v0
-; ZVBB-NEXT: srli a1, a1, 2
; ZVBB-NEXT: vwsll.vi v12, v10, 8
; ZVBB-NEXT: vmv1r.v v0, v9
; ZVBB-NEXT: vwaddu.wx v12, v12, a0, v0.t
+; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: vmsne.vi v8, v14, 0
; ZVBB-NEXT: vmsne.vi v0, v12, 0
-; ZVBB-NEXT: add a0, a1, a1
-; ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; ZVBB-NEXT: vslideup.vx v0, v8, a1
+; ZVBB-NEXT: srli a0, a0, 2
+; ZVBB-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; ZVBB-NEXT: vslideup.vx v0, v8, a0
; ZVBB-NEXT: ret
;
; ZIP-LABEL: vector_interleave_nxv32i1_nxv16i1:
@@ -61,13 +59,12 @@ define <vscale x 32 x i1> @vector_interleave_nxv32i1_nxv16i1(<vscale x 16 x i1>
; ZIP-NEXT: vmerge.vim v12, v10, 1, v0
; ZIP-NEXT: vmv1r.v v0, v9
; ZIP-NEXT: vmerge.vim v8, v10, 1, v0
-; ZIP-NEXT: srli a0, a0, 2
; ZIP-NEXT: ri.vzip2b.vv v10, v8, v12
; ZIP-NEXT: ri.vzip2a.vv v14, v8, v12
; ZIP-NEXT: vmsne.vi v8, v10, 0
; ZIP-NEXT: vmsne.vi v0, v14, 0
-; ZIP-NEXT: add a1, a0, a0
-; ZIP-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; ZIP-NEXT: srli a0, a0, 2
+; ZIP-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; ZIP-NEXT: vslideup.vx v0, v8, a0
; ZIP-NEXT: ret
%res = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
@@ -508,19 +505,17 @@ define <vscale x 48 x i1> @vector_interleave_nxv48i1_nxv16i1(<vscale x 16 x i1>
; CHECK-NEXT: add a2, a3, a2
; CHECK-NEXT: vsseg3e8.v v14, (a0)
; CHECK-NEXT: vl2r.v v8, (a2)
-; CHECK-NEXT: srli a2, a1, 2
-; CHECK-NEXT: srli a1, a1, 1
+; CHECK-NEXT: srli a2, a1, 1
; CHECK-NEXT: vl2r.v v10, (a3)
; CHECK-NEXT: vl2r.v v12, (a0)
-; CHECK-NEXT: add a0, a2, a2
+; CHECK-NEXT: srli a1, a1, 2
; CHECK-NEXT: vmsne.vi v14, v8, 0
; CHECK-NEXT: vmsne.vi v8, v10, 0
; CHECK-NEXT: vmsne.vi v0, v12, 0
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v0, v8, a2
-; CHECK-NEXT: add a0, a1, a1
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v0, v14, a1
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v0, v8, a1
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v0, v14, a2
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 6
; CHECK-NEXT: mul a0, a0, a1
@@ -551,19 +546,17 @@ define <vscale x 48 x i1> @vector_interleave_nxv48i1_nxv16i1(<vscale x 16 x i1>
; ZVBB-NEXT: add a2, a3, a2
; ZVBB-NEXT: vsseg3e8.v v14, (a0)
; ZVBB-NEXT: vl2r.v v8, (a2)
-; ZVBB-NEXT: srli a2, a1, 2
-; ZVBB-NEXT: srli a1, a1, 1
+; ZVBB-NEXT: srli a2, a1, 1
; ZVBB-NEXT: vl2r.v v10, (a3)
; ZVBB-NEXT: vl2r.v v12, (a0)
-; ZVBB-NEXT: add a0, a2, a2
+; ZVBB-NEXT: srli a1, a1, 2
; ZVBB-NEXT: vmsne.vi v14, v8, 0
; ZVBB-NEXT: vmsne.vi v8, v10, 0
; ZVBB-NEXT: vmsne.vi v0, v12, 0
-; ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; ZVBB-NEXT: vslideup.vx v0, v8, a2
-; ZVBB-NEXT: add a0, a1, a1
-; ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; ZVBB-NEXT: vslideup.vx v0, v14, a1
+; ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; ZVBB-NEXT: vslideup.vx v0, v8, a1
+; ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; ZVBB-NEXT: vslideup.vx v0, v14, a2
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: li a1, 6
; ZVBB-NEXT: mul a0, a0, a1
@@ -812,22 +805,20 @@ define <vscale x 64 x i1> @vector_interleave_nxv64i1_nxv16i1(<vscale x 16 x i1>
; CHECK-NEXT: add a2, a4, a2
; CHECK-NEXT: vsseg4e8.v v14, (a0)
; CHECK-NEXT: vl2r.v v8, (a2)
-; CHECK-NEXT: srli a2, a1, 2
-; CHECK-NEXT: srli a1, a1, 1
+; CHECK-NEXT: srli a2, a1, 1
+; CHECK-NEXT: srli a1, a1, 2
; CHECK-NEXT: vl2r.v v10, (a4)
-; CHECK-NEXT: add a4, a2, a2
; CHECK-NEXT: vl2r.v v12, (a3)
; CHECK-NEXT: vl2r.v v14, (a0)
; CHECK-NEXT: vmsne.vi v16, v8, 0
; CHECK-NEXT: vmsne.vi v8, v10, 0
; CHECK-NEXT: vmsne.vi v9, v12, 0
; CHECK-NEXT: vmsne.vi v0, v14, 0
-; CHECK-NEXT: vsetvli zero, a4, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v16, a2
-; CHECK-NEXT: vslideup.vx v0, v9, a2
-; CHECK-NEXT: add a0, a1, a1
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v0, v8, a1
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v16, a1
+; CHECK-NEXT: vslideup.vx v0, v9, a1
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v0, v8, a2
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
@@ -859,22 +850,20 @@ define <vscale x 64 x i1> @vector_interleave_nxv64i1_nxv16i1(<vscale x 16 x i1>
; ZVBB-NEXT: add a2, a4, a2
; ZVBB-NEXT: vsseg4e8.v v14, (a0)
; ZVBB-NEXT: vl2r.v v8, (a2)
-; ZVBB-NEXT: srli a2, a1, 2
-; ZVBB-NEXT: srli a1, a1, 1
+; ZVBB-NEXT: srli a2, a1, 1
+; ZVBB-NEXT: srli a1, a1, 2
; ZVBB-NEXT: vl2r.v v10, (a4)
-; ZVBB-NEXT: add a4, a2, a2
; ZVBB-NEXT: vl2r.v v12, (a3)
; ZVBB-NEXT: vl2r.v v14, (a0)
; ZVBB-NEXT: vmsne.vi v16, v8, 0
; ZVBB-NEXT: vmsne.vi v8, v10, 0
; ZVBB-NEXT: vmsne.vi v9, v12, 0
; ZVBB-NEXT: vmsne.vi v0, v14, 0
-; ZVBB-NEXT: vsetvli zero, a4, e8, mf2, ta, ma
-; ZVBB-NEXT: vslideup.vx v8, v16, a2
-; ZVBB-NEXT: vslideup.vx v0, v9, a2
-; ZVBB-NEXT: add a0, a1, a1
-; ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; ZVBB-NEXT: vslideup.vx v0, v8, a1
+; ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; ZVBB-NEXT: vslideup.vx v8, v16, a1
+; ZVBB-NEXT: vslideup.vx v0, v9, a1
+; ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; ZVBB-NEXT: vslideup.vx v0, v8, a2
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a0, a0, 3
; ZVBB-NEXT: add sp, sp, a0
@@ -1114,7 +1103,7 @@ define <vscale x 80 x i1> @vector_interleave_nxv80i1_nxv16i1(<vscale x 16 x i1>
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v18, v12, 1, v0
; CHECK-NEXT: add a2, a4, a1
-; CHECK-NEXT: srli a3, a1, 2
+; CHECK-NEXT: srli a3, a1, 1
; CHECK-NEXT: vmv2r.v v20, v14
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vmerge.vim v16, v12, 1, v0
@@ -1144,11 +1133,9 @@ define <vscale x 80 x i1> @vector_interleave_nxv80i1_nxv16i1(<vscale x 16 x i1>
; CHECK-NEXT: add a5, a4, a1
; CHECK-NEXT: vl1r.v v16, (a5)
; CHECK-NEXT: add a5, a5, a1
-; CHECK-NEXT: srli a1, a1, 1
+; CHECK-NEXT: srli a1, a1, 2
; CHECK-NEXT: vl1r.v v11, (a2)
-; CHECK-NEXT: add a2, a3, a3
; CHECK-NEXT: vl1r.v v15, (a4)
-; CHECK-NEXT: add a4, a1, a1
; CHECK-NEXT: vl1r.v v13, (a0)
; CHECK-NEXT: vl1r.v v17, (a5)
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
@@ -1156,11 +1143,11 @@ define <vscale x 80 x i1> @vector_interleave_nxv80i1_nxv16i1(<vscale x 16 x i1>
; CHECK-NEXT: vmsne.vi v0, v10, 0
; CHECK-NEXT: vmsne.vi v8, v14, 0
; CHECK-NEXT: vmsne.vi v9, v12, 0
-; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v0, v18, a3
-; CHECK-NEXT: vslideup.vx v9, v8, a3
-; CHECK-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v0, v9, a1
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v0, v18, a1
+; CHECK-NEXT: vslideup.vx v9, v8, a1
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v0, v9, a3
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
; CHECK-NEXT: vmsne.vi v8, v16, 0
; CHECK-NEXT: csrr a0, vlenb
@@ -1190,7 +1177,7 @@ define <vscale x 80 x i1> @vector_interleave_nxv80i1_nxv16i1(<vscale x 16 x i1>
; ZVBB-NEXT: vmv1r.v v0, v8
; ZVBB-NEXT: vmerge.vim v18, v12, 1, v0
; ZVBB-NEXT: add a2, a4, a1
-; ZVBB-NEXT: srli a3, a1, 2
+; ZVBB-NEXT: srli a3, a1, 1
; ZVBB-NEXT: vmv2r.v v20, v14
; ZVBB-NEXT: vmv1r.v v0, v9
; ZVBB-NEXT: vmerge.vim v16, v12, 1, v0
@@ -1220,11 +1207,9 @@ define <vscale x 80 x i1> @vector_interleave_nxv80i1_nxv16i1(<vscale x 16 x i1>
; ZVBB-NEXT: add a5, a4, a1
; ZVBB-NEXT: vl1r.v v16, (a5)
; ZVBB-NEXT: add a5, a5, a1
-; ZVBB-NEXT: srli a1, a1, 1
+; ZVBB-NEXT: srli a1, a1, 2
; ZVBB-NEXT: vl1r.v v11, (a2)
-; ZVBB-NEXT: add a2, a3, a3
; ZVBB-NEXT: vl1r.v v15, (a4)
-; ZVBB-NEXT: add a4, a1, a1
; ZVBB-NEXT: vl1r.v v13, (a0)
; ZVBB-NEXT: vl1r.v v17, (a5)
; ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, ma
@@ -1232,11 +1217,11 @@ define <vscale x 80 x i1> @vector_interleave_nxv80i1_nxv16i1(<vscale x 16 x i1>
; ZVBB-NEXT: vmsne.vi v0, v10, 0
; ZVBB-NEXT: vmsne.vi v8, v14, 0
; ZVBB-NEXT: vmsne.vi v9, v12, 0
-; ZVBB-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
-; ZVBB-NEXT: vslideup.vx v0, v18, a3
-; ZVBB-NEXT: vslideup.vx v9, v8, a3
-; ZVBB-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; ZVBB-NEXT: vslideup.vx v0, v9, a1
+; ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; ZVBB-NEXT: vslideup.vx v0, v18, a1
+; ZVBB-NEXT: vslideup.vx v9, v8, a1
+; ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; ZVBB-NEXT: vslideup.vx v0, v9, a3
; ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, ma
; ZVBB-NEXT: vmsne.vi v8, v16, 0
; ZVBB-NEXT: csrr a0, vlenb
@@ -2340,47 +2325,45 @@ define <vscale x 96 x i1> @vector_interleave_nxv96i1_nxv16i1(<vscale x 16 x i1>
; CHECK-NEXT: vmv1r.v v17, v9
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vmerge.vim v24, v20, 1, v0
-; CHECK-NEXT: addi a5, sp, 16
+; CHECK-NEXT: addi a4, sp, 16
; CHECK-NEXT: vmv1r.v v18, v25
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: vmerge.vim v26, v20, 1, v0
-; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: vmv1r.v v19, v27
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vmerge.vim v10, v20, 1, v0
-; CHECK-NEXT: add a3, a0, a2
+; CHECK-NEXT: add a2, a0, a1
; CHECK-NEXT: vmv1r.v v20, v11
-; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
; CHECK-NEXT: vsseg6e8.v v15, (a0)
; CHECK-NEXT: vmv1r.v v15, v22
-; CHECK-NEXT: add a4, a5, a2
+; CHECK-NEXT: add a5, a4, a1
; CHECK-NEXT: vmv1r.v v16, v8
-; CHECK-NEXT: srli a1, a2, 2
+; CHECK-NEXT: srli a3, a1, 1
; CHECK-NEXT: vmv1r.v v17, v24
-; CHECK-NEXT: add a6, a4, a2
+; CHECK-NEXT: add a6, a5, a1
; CHECK-NEXT: vmv1r.v v18, v26
-; CHECK-NEXT: add a7, a3, a2
+; CHECK-NEXT: add a7, a2, a1
; CHECK-NEXT: vmv1r.v v19, v10
-; CHECK-NEXT: vsseg6e8.v v14, (a5)
+; CHECK-NEXT: vsseg6e8.v v14, (a4)
; CHECK-NEXT: vl1r.v v8, (a0)
-; CHECK-NEXT: add a0, a6, a2
+; CHECK-NEXT: add a0, a6, a1
; CHECK-NEXT: vl1r.v v10, (a6)
-; CHECK-NEXT: add a6, a7, a2
-; CHECK-NEXT: vl1r.v v12, (a5)
-; CHECK-NEXT: add a5, a0, a2
+; CHECK-NEXT: add a6, a7, a1
+; CHECK-NEXT: vl1r.v v12, (a4)
+; CHECK-NEXT: add a4, a0, a1
; CHECK-NEXT: vl1r.v v14, (a7)
-; CHECK-NEXT: add a7, a6, a2
-; CHECK-NEXT: vl1r.v v16, (a5)
-; CHECK-NEXT: add a5, a5, a2
+; CHECK-NEXT: add a7, a6, a1
+; CHECK-NEXT: vl1r.v v16, (a4)
+; CHECK-NEXT: add a4, a4, a1
; CHECK-NEXT: vl1r.v v18, (a7)
-; CHECK-NEXT: add a7, a7, a2
-; CHECK-NEXT: srli a2, a2, 1
-; CHECK-NEXT: vl1r.v v9, (a3)
-; CHECK-NEXT: add a3, a1, a1
-; CHECK-NEXT: vl1r.v v17, (a5)
-; CHECK-NEXT: add a5, a2, a2
+; CHECK-NEXT: add a7, a7, a1
+; CHECK-NEXT: srli a1, a1, 2
+; CHECK-NEXT: vl1r.v v9, (a2)
+; CHECK-NEXT: vl1r.v v17, (a4)
; CHECK-NEXT: vl1r.v v11, (a0)
-; CHECK-NEXT: vl1r.v v13, (a4)
+; CHECK-NEXT: vl1r.v v13, (a5)
; CHECK-NEXT: vl1r.v v19, (a7)
; CHECK-NEXT: vl1r.v v15, (a6)
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
@@ -2390,12 +2373,12 @@ define <vscale x 96 x i1> @vector_interleave_nxv96i1_nxv16i1(<vscale x 16 x i1>
; CHECK-NEXT: vmsne.vi v0, v12, 0
; CHECK-NEXT: vmsne.vi v10, v18, 0
; CHECK-NEXT: vmsne.vi v8, v14, 0
-; CHECK-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v9, v20, a1
; CHECK-NEXT: vslideup.vx v0, v16, a1
-; CHECK-NEXT: vsetvli zero, a5, e8, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v0, v9, a2
-; CHECK-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v0, v9, a3
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v8, v10, a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 12
@@ -2427,47 +2410,45 @@ define <vscale x 96 x i1> @vector_interleave_nxv96i1_nxv16i1(<vscale x 16 x i1>
; ZVBB-NEXT: vmv1r.v v17, v9
; ZVBB-NEXT: vmv1r.v v0, v10
; ZVBB-NEXT: vmerge.vim v24, v20, 1, v0
-; ZVBB-NEXT: addi a5, sp, 16
+; ZVBB-NEXT: addi a4, sp, 16
; ZVBB-NEXT: vmv1r.v v18, v25
; ZVBB-NEXT: vmv1r.v v0, v11
; ZVBB-NEXT: vmerge.vim v26, v20, 1, v0
-; ZVBB-NEXT: csrr a2, vlenb
+; ZVBB-NEXT: csrr a1, vlenb
; ZVBB-NEXT: vmv1r.v v19, v27
; ZVBB-NEXT: vmv1r.v v0, v12
; ZVBB-NEXT: vmerge.vim v10, v20, 1, v0
-; ZVBB-NEXT: add a3, a0, a2
+; ZVBB-NEXT: add a2, a0, a1
; ZVBB-NEXT: vmv1r.v v20, v11
-; ZVBB-NEXT: vsetvli a1, zero, e8, m1, ta, ma
+; ZVBB-NEXT: vsetvli a3, zero, e8, m1, ta, ma
; ZVBB-NEXT: vsseg6e8.v v15, (a0)
; ZVBB-NEXT: vmv1r.v v15, v22
-; ZVBB-NEXT: add a4, a5, a2
+; ZVBB-NEXT: add a5, a4, a1
; ZVBB-NEXT: vmv1r.v v16, v8
-; ZVBB-NEXT: srli a1, a2, 2
+; ZVBB-NEXT: srli a3, a1, 1
; ZVBB-NEXT: vmv1r.v v17, v24
-; ZVBB-NEXT: add a6, a4, a2
+; ZVBB-NEXT: add a6, a5, a1
; ZVBB-NEXT: vmv1r.v v18, v26
-; ZVBB-NEXT: add a7, a3, a2
+; ZVBB-NEXT: add a7, a2, a1
; ZVBB-NEXT: vmv1r.v v19, v10
-; ZVBB-NEXT: vsseg6e8.v v14, (a5)
+; ZVBB-NEXT: vsseg6e8.v v14, (a4)
; ZVBB-NEXT: vl1r.v v8, (a0)
-; ZVBB-NEXT: add a0, a6, a2
+; ZVBB-NEXT: add a0, a6, a1
; ZVBB-NEXT: vl1r.v v10, (a6)
-; ZVBB-NEXT: add a6, a7, a2
-; ZVBB-NEXT: vl1r.v v12, (a5)
-; ZVBB-NEXT: add a5, a0, a2
+; ZVBB-NEXT: add a6, a7, a1
+; ZVBB-NEXT: vl1r.v v12, (a4)
+; ZVBB-NEXT: add a4, a0, a1
; ZVBB-NEXT: vl1r.v v14, (a7)
-; ZVBB-NEXT: add a7, a6, a2
-; ZVBB-NEXT: vl1r.v v16, (a5)
-; ZVBB-NEXT: add a5, a5, a2
+; ZVBB-NEXT: add a7, a6, a1
+; ZVBB-NEXT: vl1r.v v16, (a4)
+; ZVBB-NEXT: add a4, a4, a1
; ZVBB-NEXT: vl1r.v v18, (a7)
-; ZVBB-NEXT: add a7, a7, a2
-; ZVBB-NEXT: srli a2, a2, 1
-; ZVBB-NEXT: vl1r.v v9, (a3)
-; ZVBB-NEXT: add a3, a1, a1
-; ZVBB-NEXT: vl1r.v v17, (a5)
-; ZVBB-NEXT: add a5, a2, a2
+; ZVBB-NEXT: add a7, a7, a1
+; ZVBB-NEXT: srli a1, a1, 2
+; ZVBB-NEXT: vl1r.v v9, (a2)
+; ZVBB-NEXT: vl1r.v v17, (a4)
; ZVBB-NEXT: vl1r.v v11, (a0)
-; ZVBB-NEXT: vl1r.v v13, (a4)
+; ZVBB-NEXT: vl1r.v v13, (a5)
; ZVBB-NEXT: vl1r.v v19, (a7)
; ZVBB-NEXT: vl1r.v v15, (a6)
; ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, ma
@@ -2477,12 +2458,12 @@ define <vscale x 96 x i1> @vector_interleave_nxv96i1_nxv16i1(<vscale x 16 x i1>
; ZVBB-NEXT: vmsne.vi v0, v12, 0
; ZVBB-NEXT: vmsne.vi v10, v18, 0
; ZVBB-NEXT: vmsne.vi v8, v14, 0
-; ZVBB-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v20, a1
; ZVBB-NEXT: vslideup.vx v0, v16, a1
-; ZVBB-NEXT: vsetvli zero, a5, e8, m1, ta, ma
-; ZVBB-NEXT: vslideup.vx v0, v9, a2
-; ZVBB-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; ZVBB-NEXT: vslideup.vx v0, v9, a3
+; ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v10, a1
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: li a1, 12
@@ -3676,23 +3657,21 @@ define <vscale x 112 x i1> @vector_interleave_nxv112i1_nxv16i1(<vscale x 16 x i1
; CHECK-NEXT: sub sp, sp, a0
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
; CHECK-NEXT: vmv.v.i v14, 0
-; CHECK-NEXT: addi a4, sp, 16
+; CHECK-NEXT: addi a3, sp, 16
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a1, a0, 3
; CHECK-NEXT: sub a0, a1, a0
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: vmerge.vim v16, v14, 1, v0
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v22, v14, 1, v0
-; CHECK-NEXT: add a3, a4, a2
-; CHECK-NEXT: srli a1, a2, 2
-; CHECK-NEXT: add a5, a0, a2
+; CHECK-NEXT: add a2, a3, a1
; CHECK-NEXT: vmv4r.v v24, v16
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vmerge.vim v18, v14, 1, v0
-; CHECK-NEXT: add a6, a3, a2
+; CHECK-NEXT: add a4, a2, a1
; CHECK-NEXT: vmv1r.v v25, v22
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vmerge.vim v8, v14, 1, v0
@@ -3704,41 +3683,41 @@ define <vscale x 112 x i1> @vector_interleave_nxv112i1_nxv16i1(<vscale x 16 x i1
; CHECK-NEXT: vmerge.vim v10, v14, 1, v0
; CHECK-NEXT: vmv1r.v v28, v20
; CHECK-NEXT: vmv1r.v v18, v23
-; CHECK-NEXT: add a7, a6, a2
+; CHECK-NEXT: add a5, a4, a1
; CHECK-NEXT: vmv1r.v v29, v10
; CHECK-NEXT: vmv1r.v v20, v9
; CHECK-NEXT: vmv1r.v v0, v13
; CHECK-NEXT: vmerge.vim v30, v14, 1, v0
; CHECK-NEXT: vmv1r.v v22, v11
-; CHECK-NEXT: vsetvli t0, zero, e8, m1, ta, ma
-; CHECK-NEXT: vsseg7e8.v v24, (a4)
+; CHECK-NEXT: vsetvli a6, zero, e8, m1, ta, ma
+; CHECK-NEXT: vsseg7e8.v v24, (a3)
; CHECK-NEXT: vmv1r.v v23, v31
; CHECK-NEXT: vsseg7e8.v v17, (a0)
-; CHECK-NEXT: vl1r.v v8, (a6)
-; CHECK-NEXT: add a6, a7, a2
-; CHECK-NEXT: vl1r.v v10, (a4)
-; CHECK-NEXT: add a4, a6, a2
-; CHECK-NEXT: vl1r.v v12, (a6)
-; CHECK-NEXT: add a6, a4, a2
-; CHECK-NEXT: vl1r.v v14, (a6)
-; CHECK-NEXT: add a6, a5, a2
-; CHECK-NEXT: vl1r.v v16, (a5)
-; CHECK-NEXT: add a5, a6, a2
-; CHECK-NEXT: vl1r.v v18, (a5)
-; CHECK-NEXT: add a5, a5, a2
-; CHECK-NEXT: vl1r.v v9, (a7)
-; CHECK-NEXT: add a7, a5, a2
-; CHECK-NEXT: vl1r.v v20, (a7)
-; CHECK-NEXT: add a7, a7, a2
-; CHECK-NEXT: srli a2, a2, 1
-; CHECK-NEXT: vl1r.v v11, (a3)
-; CHECK-NEXT: add a3, a1, a1
-; CHECK-NEXT: vl1r.v v13, (a4)
-; CHECK-NEXT: add a4, a2, a2
+; CHECK-NEXT: vl1r.v v8, (a4)
+; CHECK-NEXT: add a4, a5, a1
+; CHECK-NEXT: vl1r.v v10, (a3)
+; CHECK-NEXT: add a6, a4, a1
+; CHECK-NEXT: vl1r.v v12, (a4)
+; CHECK-NEXT: add a3, a6, a1
+; CHECK-NEXT: vl1r.v v14, (a3)
+; CHECK-NEXT: srli a3, a1, 1
+; CHECK-NEXT: vl1r.v v9, (a5)
+; CHECK-NEXT: add a4, a0, a1
+; CHECK-NEXT: vl1r.v v16, (a4)
+; CHECK-NEXT: add a4, a4, a1
+; CHECK-NEXT: vl1r.v v11, (a2)
+; CHECK-NEXT: add a2, a4, a1
+; CHECK-NEXT: vl1r.v v18, (a2)
+; CHECK-NEXT: add a2, a2, a1
+; CHECK-NEXT: vl1r.v v13, (a6)
+; CHECK-NEXT: add a5, a2, a1
+; CHECK-NEXT: vl1r.v v20, (a5)
+; CHECK-NEXT: add a5, a5, a1
+; CHECK-NEXT: srli a1, a1, 2
; CHECK-NEXT: vl1r.v v15, (a0)
-; CHECK-NEXT: vl1r.v v19, (a5)
-; CHECK-NEXT: vl1r.v v17, (a6)
-; CHECK-NEXT: vl1r.v v21, (a7)
+; CHECK-NEXT: vl1r.v v19, (a2)
+; CHECK-NEXT: vl1r.v v17, (a4)
+; CHECK-NEXT: vl1r.v v21, (a5)
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
; CHECK-NEXT: vmsne.vi v22, v8, 0
; CHECK-NEXT: vmsne.vi v0, v10, 0
@@ -3747,13 +3726,13 @@ define <vscale x 112 x i1> @vector_interleave_nxv112i1_nxv16i1(<vscale x 16 x i1
; CHECK-NEXT: vmsne.vi v11, v18, 0
; CHECK-NEXT: vmsne.vi v8, v16, 0
; CHECK-NEXT: vmsne.vi v12, v20, 0
-; CHECK-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v0, v22, a1
; CHECK-NEXT: vslideup.vx v9, v10, a1
; CHECK-NEXT: vslideup.vx v8, v11, a1
-; CHECK-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v0, v9, a2
-; CHECK-NEXT: vslideup.vx v8, v12, a2
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v0, v9, a3
+; CHECK-NEXT: vslideup.vx v8, v12, a3
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 14
; CHECK-NEXT: mul a0, a0, a1
@@ -3770,23 +3749,21 @@ define <vscale x 112 x i1> @vector_interleave_nxv112i1_nxv16i1(<vscale x 16 x i1
; ZVBB-NEXT: sub sp, sp, a0
; ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, ma
; ZVBB-NEXT: vmv.v.i v14, 0
-; ZVBB-NEXT: addi a4, sp, 16
+; ZVBB-NEXT: addi a3, sp, 16
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a1, a0, 3
; ZVBB-NEXT: sub a0, a1, a0
; ZVBB-NEXT: add a0, sp, a0
; ZVBB-NEXT: addi a0, a0, 16
-; ZVBB-NEXT: csrr a2, vlenb
+; ZVBB-NEXT: csrr a1, vlenb
; ZVBB-NEXT: vmerge.vim v16, v14, 1, v0
; ZVBB-NEXT: vmv1r.v v0, v8
; ZVBB-NEXT: vmerge.vim v22, v14, 1, v0
-; ZVBB-NEXT: add a3, a4, a2
-; ZVBB-NEXT: srli a1, a2, 2
-; ZVBB-NEXT: add a5, a0, a2
+; ZVBB-NEXT: add a2, a3, a1
; ZVBB-NEXT: vmv4r.v v24, v16
; ZVBB-NEXT: vmv1r.v v0, v9
; ZVBB-NEXT: vmerge.vim v18, v14, 1, v0
-; ZVBB-NEXT: add a6, a3, a2
+; ZVBB-NEXT: add a4, a2, a1
; ZVBB-NEXT: vmv1r.v v25, v22
; ZVBB-NEXT: vmv1r.v v0, v10
; ZVBB-NEXT: vmerge.vim v8, v14, 1, v0
@@ -3798,41 +3775,41 @@ define <vscale x 112 x i1> @vector_interleave_nxv112i1_nxv16i1(<vscale x 16 x i1
; ZVBB-NEXT: vmerge.vim v10, v14, 1, v0
; ZVBB-NEXT: vmv1r.v v28, v20
; ZVBB-NEXT: vmv1r.v v18, v23
-; ZVBB-NEXT: add a7, a6, a2
+; ZVBB-NEXT: add a5, a4, a1
; ZVBB-NEXT: vmv1r.v v29, v10
; ZVBB-NEXT: vmv1r.v v20, v9
; ZVBB-NEXT: vmv1r.v v0, v13
; ZVBB-NEXT: vmerge.vim v30, v14, 1, v0
; ZVBB-NEXT: vmv1r.v v22, v11
-; ZVBB-NEXT: vsetvli t0, zero, e8, m1, ta, ma
-; ZVBB-NEXT: vsseg7e8.v v24, (a4)
+; ZVBB-NEXT: vsetvli a6, zero, e8, m1, ta, ma
+; ZVBB-NEXT: vsseg7e8.v v24, (a3)
; ZVBB-NEXT: vmv1r.v v23, v31
; ZVBB-NEXT: vsseg7e8.v v17, (a0)
-; ZVBB-NEXT: vl1r.v v8, (a6)
-; ZVBB-NEXT: add a6, a7, a2
-; ZVBB-NEXT: vl1r.v v10, (a4)
-; ZVBB-NEXT: add a4, a6, a2
-; ZVBB-NEXT: vl1r.v v12, (a6)
-; ZVBB-NEXT: add a6, a4, a2
-; ZVBB-NEXT: vl1r.v v14, (a6)
-; ZVBB-NEXT: add a6, a5, a2
-; ZVBB-NEXT: vl1r.v v16, (a5)
-; ZVBB-NEXT: add a5, a6, a2
-; ZVBB-NEXT: vl1r.v v18, (a5)
-; ZVBB-NEXT: add a5, a5, a2
-; ZVBB-NEXT: vl1r.v v9, (a7)
-; ZVBB-NEXT: add a7, a5, a2
-; ZVBB-NEXT: vl1r.v v20, (a7)
-; ZVBB-NEXT: add a7, a7, a2
-; ZVBB-NEXT: srli a2, a2, 1
-; ZVBB-NEXT: vl1r.v v11, (a3)
-; ZVBB-NEXT: add a3, a1, a1
-; ZVBB-NEXT: vl1r.v v13, (a4)
-; ZVBB-NEXT: add a4, a2, a2
+; ZVBB-NEXT: vl1r.v v8, (a4)
+; ZVBB-NEXT: add a4, a5, a1
+; ZVBB-NEXT: vl1r.v v10, (a3)
+; ZVBB-NEXT: add a6, a4, a1
+; ZVBB-NEXT: vl1r.v v12, (a4)
+; ZVBB-NEXT: add a3, a6, a1
+; ZVBB-NEXT: vl1r.v v14, (a3)
+; ZVBB-NEXT: srli a3, a1, 1
+; ZVBB-NEXT: vl1r.v v9, (a5)
+; ZVBB-NEXT: add a4, a0, a1
+; ZVBB-NEXT: vl1r.v v16, (a4)
+; ZVBB-NEXT: add a4, a4, a1
+; ZVBB-NEXT: vl1r.v v11, (a2)
+; ZVBB-NEXT: add a2, a4, a1
+; ZVBB-NEXT: vl1r.v v18, (a2)
+; ZVBB-NEXT: add a2, a2, a1
+; ZVBB-NEXT: vl1r.v v13, (a6)
+; ZVBB-NEXT: add a5, a2, a1
+; ZVBB-NEXT: vl1r.v v20, (a5)
+; ZVBB-NEXT: add a5, a5, a1
+; ZVBB-NEXT: srli a1, a1, 2
; ZVBB-NEXT: vl1r.v v15, (a0)
-; ZVBB-NEXT: vl1r.v v19, (a5)
-; ZVBB-NEXT: vl1r.v v17, (a6)
-; ZVBB-NEXT: vl1r.v v21, (a7)
+; ZVBB-NEXT: vl1r.v v19, (a2)
+; ZVBB-NEXT: vl1r.v v17, (a4)
+; ZVBB-NEXT: vl1r.v v21, (a5)
; ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, ma
; ZVBB-NEXT: vmsne.vi v22, v8, 0
; ZVBB-NEXT: vmsne.vi v0, v10, 0
@@ -3841,13 +3818,13 @@ define <vscale x 112 x i1> @vector_interleave_nxv112i1_nxv16i1(<vscale x 16 x i1
; ZVBB-NEXT: vmsne.vi v11, v18, 0
; ZVBB-NEXT: vmsne.vi v8, v16, 0
; ZVBB-NEXT: vmsne.vi v12, v20, 0
-; ZVBB-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; ZVBB-NEXT: vslideup.vx v0, v22, a1
; ZVBB-NEXT: vslideup.vx v9, v10, a1
; ZVBB-NEXT: vslideup.vx v8, v11, a1
-; ZVBB-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; ZVBB-NEXT: vslideup.vx v0, v9, a2
-; ZVBB-NEXT: vslideup.vx v8, v12, a2
+; ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; ZVBB-NEXT: vslideup.vx v0, v9, a3
+; ZVBB-NEXT: vslideup.vx v8, v12, a3
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: li a1, 14
; ZVBB-NEXT: mul a0, a0, a1
@@ -5569,54 +5546,52 @@ define <vscale x 128 x i1> @vector_interleave_nxv128i1_nxv16i1(<vscale x 16 x i1
; CHECK-NEXT: add a6, a4, a0
; CHECK-NEXT: add a7, a5, a0
; CHECK-NEXT: add t0, a6, a0
-; CHECK-NEXT: add t1, a7, a0
-; CHECK-NEXT: add t2, t0, a0
; CHECK-NEXT: vmv1r.v v20, v9
-; CHECK-NEXT: add t3, t1, a0
+; CHECK-NEXT: add t1, a7, a0
; CHECK-NEXT: vmv1r.v v22, v11
; CHECK-NEXT: vsseg8e8.v v16, (a1)
-; CHECK-NEXT: vl1r.v v10, (t1)
-; CHECK-NEXT: add t1, t2, a0
-; CHECK-NEXT: vl1r.v v12, (a5)
-; CHECK-NEXT: add a5, t3, a0
+; CHECK-NEXT: vl1r.v v8, (a5)
+; CHECK-NEXT: add a5, t0, a0
+; CHECK-NEXT: vl1r.v v12, (t1)
+; CHECK-NEXT: add t1, t1, a0
; CHECK-NEXT: vl1r.v v14, (a2)
-; CHECK-NEXT: add a2, t1, a0
+; CHECK-NEXT: add a2, a5, a0
+; CHECK-NEXT: vl1r.v v10, (a5)
+; CHECK-NEXT: add a5, t1, a0
; CHECK-NEXT: vl1r.v v16, (a5)
; CHECK-NEXT: add a5, a5, a0
-; CHECK-NEXT: vl1r.v v8, (a2)
-; CHECK-NEXT: add a2, a2, a0
-; CHECK-NEXT: vl1r.v v18, (t2)
; CHECK-NEXT: vl1r.v v17, (a5)
-; CHECK-NEXT: vl1r.v v11, (t3)
-; CHECK-NEXT: vl1r.v v13, (a7)
+; CHECK-NEXT: add a5, a2, a0
+; CHECK-NEXT: vl1r.v v18, (a5)
+; CHECK-NEXT: add a5, a5, a0
+; CHECK-NEXT: vl1r.v v13, (t1)
+; CHECK-NEXT: vl1r.v v9, (a7)
; CHECK-NEXT: vl1r.v v15, (a3)
; CHECK-NEXT: vsetvli a3, zero, e8, m2, ta, ma
; CHECK-NEXT: vmsne.vi v20, v16, 0
-; CHECK-NEXT: vmsne.vi v16, v10, 0
-; CHECK-NEXT: vl1r.v v10, (a6)
-; CHECK-NEXT: vmsne.vi v17, v12, 0
+; CHECK-NEXT: vmsne.vi v16, v12, 0
+; CHECK-NEXT: vl1r.v v12, (a6)
+; CHECK-NEXT: vmsne.vi v17, v8, 0
; CHECK-NEXT: vmsne.vi v0, v14, 0
-; CHECK-NEXT: vl1r.v v12, (a1)
-; CHECK-NEXT: vl1r.v v9, (a2)
-; CHECK-NEXT: vl1r.v v19, (t1)
-; CHECK-NEXT: vl1r.v v11, (t0)
-; CHECK-NEXT: vl1r.v v13, (a4)
-; CHECK-NEXT: vmsne.vi v14, v8, 0
+; CHECK-NEXT: vl1r.v v14, (a1)
+; CHECK-NEXT: vl1r.v v19, (a5)
+; CHECK-NEXT: vl1r.v v11, (a2)
+; CHECK-NEXT: vl1r.v v13, (t0)
+; CHECK-NEXT: vl1r.v v15, (a4)
; CHECK-NEXT: vmsne.vi v9, v18, 0
-; CHECK-NEXT: vmsne.vi v15, v10, 0
-; CHECK-NEXT: vmsne.vi v8, v12, 0
+; CHECK-NEXT: vmsne.vi v18, v10, 0
+; CHECK-NEXT: vmsne.vi v10, v12, 0
+; CHECK-NEXT: vmsne.vi v8, v14, 0
; CHECK-NEXT: srli a1, a0, 2
-; CHECK-NEXT: add a2, a1, a1
-; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v16, v20, a1
; CHECK-NEXT: vslideup.vx v0, v17, a1
-; CHECK-NEXT: vslideup.vx v9, v14, a1
-; CHECK-NEXT: vslideup.vx v8, v15, a1
+; CHECK-NEXT: vslideup.vx v18, v9, a1
+; CHECK-NEXT: vslideup.vx v8, v10, a1
; CHECK-NEXT: srli a0, a0, 1
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vslideup.vx v0, v16, a0
-; CHECK-NEXT: vslideup.vx v8, v9, a0
+; CHECK-NEXT: vslideup.vx v8, v18, a0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
@@ -5670,54 +5645,52 @@ define <vscale x 128 x i1> @vector_interleave_nxv128i1_nxv16i1(<vscale x 16 x i1
; ZVBB-NEXT: add a6, a4, a0
; ZVBB-NEXT: add a7, a5, a0
; ZVBB-NEXT: add t0, a6, a0
-; ZVBB-NEXT: add t1, a7, a0
-; ZVBB-NEXT: add t2, t0, a0
; ZVBB-NEXT: vmv1r.v v20, v9
-; ZVBB-NEXT: add t3, t1, a0
+; ZVBB-NEXT: add t1, a7, a0
; ZVBB-NEXT: vmv1r.v v22, v11
; ZVBB-NEXT: vsseg8e8.v v16, (a1)
-; ZVBB-NEXT: vl1r.v v10, (t1)
-; ZVBB-NEXT: add t1, t2, a0
-; ZVBB-NEXT: vl1r.v v12, (a5)
-; ZVBB-NEXT: add a5, t3, a0
+; ZVBB-NEXT: vl1r.v v8, (a5)
+; ZVBB-NEXT: add a5, t0, a0
+; ZVBB-NEXT: vl1r.v v12, (t1)
+; ZVBB-NEXT: add t1, t1, a0
; ZVBB-NEXT: vl1r.v v14, (a2)
-; ZVBB-NEXT: add a2, t1, a0
+; ZVBB-NEXT: add a2, a5, a0
+; ZVBB-NEXT: vl1r.v v10, (a5)
+; ZVBB-NEXT: add a5, t1, a0
; ZVBB-NEXT: vl1r.v v16, (a5)
; ZVBB-NEXT: add a5, a5, a0
-; ZVBB-NEXT: vl1r.v v8, (a2)
-; ZVBB-NEXT: add a2, a2, a0
-; ZVBB-NEXT: vl1r.v v18, (t2)
; ZVBB-NEXT: vl1r.v v17, (a5)
-; ZVBB-NEXT: vl1r.v v11, (t3)
-; ZVBB-NEXT: vl1r.v v13, (a7)
+; ZVBB-NEXT: add a5, a2, a0
+; ZVBB-NEXT: vl1r.v v18, (a5)
+; ZVBB-NEXT: add a5, a5, a0
+; ZVBB-NEXT: vl1r.v v13, (t1)
+; ZVBB-NEXT: vl1r.v v9, (a7)
; ZVBB-NEXT: vl1r.v v15, (a3)
; ZVBB-NEXT: vsetvli a3, zero, e8, m2, ta, ma
; ZVBB-NEXT: vmsne.vi v20, v16, 0
-; ZVBB-NEXT: vmsne.vi v16, v10, 0
-; ZVBB-NEXT: vl1r.v v10, (a6)
-; ZVBB-NEXT: vmsne.vi v17, v12, 0
+; ZVBB-NEXT: vmsne.vi v16, v12, 0
+; ZVBB-NEXT: vl1r.v v12, (a6)
+; ZVBB-NEXT: vmsne.vi v17, v8, 0
; ZVBB-NEXT: vmsne.vi v0, v14, 0
-; ZVBB-NEXT: vl1r.v v12, (a1)
-; ZVBB-NEXT: vl1r.v v9, (a2)
-; ZVBB-NEXT: vl1r.v v19, (t1)
-; ZVBB-NEXT: vl1r.v v11, (t0)
-; ZVBB-NEXT: vl1r.v v13, (a4)
-; ZVBB-NEXT: vmsne.vi v14, v8, 0
+; ZVBB-NEXT: vl1r.v v14, (a1)
+; ZVBB-NEXT: vl1r.v v19, (a5)
+; ZVBB-NEXT: vl1r.v v11, (a2)
+; ZVBB-NEXT: vl1r.v v13, (t0)
+; ZVBB-NEXT: vl1r.v v15, (a4)
; ZVBB-NEXT: vmsne.vi v9, v18, 0
-; ZVBB-NEXT: vmsne.vi v15, v10, 0
-; ZVBB-NEXT: vmsne.vi v8, v12, 0
+; ZVBB-NEXT: vmsne.vi v18, v10, 0
+; ZVBB-NEXT: vmsne.vi v10, v12, 0
+; ZVBB-NEXT: vmsne.vi v8, v14, 0
; ZVBB-NEXT: srli a1, a0, 2
-; ZVBB-NEXT: add a2, a1, a1
-; ZVBB-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; ZVBB-NEXT: vslideup.vx v16, v20, a1
; ZVBB-NEXT: vslideup.vx v0, v17, a1
-; ZVBB-NEXT: vslideup.vx v9, v14, a1
-; ZVBB-NEXT: vslideup.vx v8, v15, a1
+; ZVBB-NEXT: vslideup.vx v18, v9, a1
+; ZVBB-NEXT: vslideup.vx v8, v10, a1
; ZVBB-NEXT: srli a0, a0, 1
-; ZVBB-NEXT: add a1, a0, a0
-; ZVBB-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; ZVBB-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v0, v16, a0
-; ZVBB-NEXT: vslideup.vx v8, v9, a0
+; ZVBB-NEXT: vslideup.vx v8, v18, a0
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a0, a0, 4
; ZVBB-NEXT: add sp, sp, a0
@@ -6294,14 +6267,12 @@ define <vscale x 4 x bfloat> @vector_interleave_nxv4bf16_nxv2bf16(<vscale x 2 x
; V-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
; V-NEXT: vwaddu.vv v10, v8, v9
; V-NEXT: li a0, -1
-; V-NEXT: csrr a1, vlenb
; V-NEXT: vwmaccu.vx v10, a0, v9
-; V-NEXT: srli a1, a1, 2
-; V-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; V-NEXT: vslidedown.vx v8, v10, a1
-; V-NEXT: add a0, a1, a1
-; V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; V-NEXT: vslideup.vx v10, v8, a1
+; V-NEXT: csrr a0, vlenb
+; V-NEXT: srli a0, a0, 2
+; V-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; V-NEXT: vslidedown.vx v8, v10, a0
+; V-NEXT: vslideup.vx v10, v8, a0
; V-NEXT: vmv.v.v v8, v10
; V-NEXT: ret
;
@@ -6314,8 +6285,6 @@ define <vscale x 4 x bfloat> @vector_interleave_nxv4bf16_nxv2bf16(<vscale x 2 x
; ZVBB-NEXT: srli a0, a0, 2
; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslidedown.vx v8, v10, a0
-; ZVBB-NEXT: add a1, a0, a0
-; ZVBB-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v10, v8, a0
; ZVBB-NEXT: vmv.v.v v8, v10
; ZVBB-NEXT: ret
@@ -6327,8 +6296,7 @@ define <vscale x 4 x bfloat> @vector_interleave_nxv4bf16_nxv2bf16(<vscale x 2 x
; ZIP-NEXT: ri.vzip2a.vv v10, v8, v9
; ZIP-NEXT: csrr a0, vlenb
; ZIP-NEXT: srli a0, a0, 2
-; ZIP-NEXT: add a1, a0, a0
-; ZIP-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; ZIP-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; ZIP-NEXT: vslideup.vx v10, v11, a0
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
@@ -6374,14 +6342,12 @@ define <vscale x 4 x half> @vector_interleave_nxv4f16_nxv2f16(<vscale x 2 x half
; V-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
; V-NEXT: vwaddu.vv v10, v8, v9
; V-NEXT: li a0, -1
-; V-NEXT: csrr a1, vlenb
; V-NEXT: vwmaccu.vx v10, a0, v9
-; V-NEXT: srli a1, a1, 2
-; V-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; V-NEXT: vslidedown.vx v8, v10, a1
-; V-NEXT: add a0, a1, a1
-; V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; V-NEXT: vslideup.vx v10, v8, a1
+; V-NEXT: csrr a0, vlenb
+; V-NEXT: srli a0, a0, 2
+; V-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; V-NEXT: vslidedown.vx v8, v10, a0
+; V-NEXT: vslideup.vx v10, v8, a0
; V-NEXT: vmv.v.v v8, v10
; V-NEXT: ret
;
@@ -6394,8 +6360,6 @@ define <vscale x 4 x half> @vector_interleave_nxv4f16_nxv2f16(<vscale x 2 x half
; ZVBB-NEXT: srli a0, a0, 2
; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslidedown.vx v8, v10, a0
-; ZVBB-NEXT: add a1, a0, a0
-; ZVBB-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v10, v8, a0
; ZVBB-NEXT: vmv.v.v v8, v10
; ZVBB-NEXT: ret
@@ -6407,8 +6371,7 @@ define <vscale x 4 x half> @vector_interleave_nxv4f16_nxv2f16(<vscale x 2 x half
; ZIP-NEXT: ri.vzip2a.vv v10, v8, v9
; ZIP-NEXT: csrr a0, vlenb
; ZIP-NEXT: srli a0, a0, 2
-; ZIP-NEXT: add a1, a0, a0
-; ZIP-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; ZIP-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; ZIP-NEXT: vslideup.vx v10, v11, a0
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
@@ -6807,8 +6770,7 @@ define <vscale x 6 x half> @vector_interleave_nxv6f16_nxv2f16(<vscale x 2 x half
; CHECK-NEXT: vle16.v v9, (a3)
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: srli a1, a1, 2
-; CHECK-NEXT: add a0, a1, a1
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v9, a1
; CHECK-NEXT: add a2, a3, a2
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
@@ -6834,8 +6796,7 @@ define <vscale x 6 x half> @vector_interleave_nxv6f16_nxv2f16(<vscale x 2 x half
; ZVBB-NEXT: vle16.v v9, (a3)
; ZVBB-NEXT: vle16.v v8, (a0)
; ZVBB-NEXT: srli a1, a1, 2
-; ZVBB-NEXT: add a0, a1, a1
-; ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v9, a1
; ZVBB-NEXT: add a2, a3, a2
; ZVBB-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
@@ -6967,8 +6928,7 @@ define <vscale x 6 x bfloat> @vector_interleave_nxv6bf16_nxv2bf16(<vscale x 2 x
; CHECK-NEXT: vle16.v v9, (a3)
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: srli a1, a1, 2
-; CHECK-NEXT: add a0, a1, a1
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v9, a1
; CHECK-NEXT: add a2, a3, a2
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
@@ -6994,8 +6954,7 @@ define <vscale x 6 x bfloat> @vector_interleave_nxv6bf16_nxv2bf16(<vscale x 2 x
; ZVBB-NEXT: vle16.v v9, (a3)
; ZVBB-NEXT: vle16.v v8, (a0)
; ZVBB-NEXT: srli a1, a1, 2
-; ZVBB-NEXT: add a0, a1, a1
-; ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v9, a1
; ZVBB-NEXT: add a2, a3, a2
; ZVBB-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
@@ -7127,8 +7086,7 @@ define <vscale x 3 x float> @vector_interleave_nxv3f32_nxv1f32(<vscale x 1 x flo
; CHECK-NEXT: vle32.v v9, (a3)
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: srli a1, a1, 3
-; CHECK-NEXT: add a0, a1, a1
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v9, a1
; CHECK-NEXT: add a2, a3, a2
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
@@ -7154,8 +7112,7 @@ define <vscale x 3 x float> @vector_interleave_nxv3f32_nxv1f32(<vscale x 1 x flo
; ZVBB-NEXT: vle32.v v9, (a3)
; ZVBB-NEXT: vle32.v v8, (a0)
; ZVBB-NEXT: srli a1, a1, 3
-; ZVBB-NEXT: add a0, a1, a1
-; ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v9, a1
; ZVBB-NEXT: add a2, a3, a2
; ZVBB-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
@@ -7391,13 +7348,12 @@ define <vscale x 8 x half> @vector_interleave_nxv8f16_nxv2f16(<vscale x 2 x half
; CHECK-NEXT: vle16.v v9, (a4)
; CHECK-NEXT: vle16.v v8, (a2)
; CHECK-NEXT: srli a1, a1, 2
-; CHECK-NEXT: add a2, a1, a1
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v8, a1
-; CHECK-NEXT: vsetvli a4, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v10, (a3)
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v10, a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 1
@@ -7422,13 +7378,12 @@ define <vscale x 8 x half> @vector_interleave_nxv8f16_nxv2f16(<vscale x 2 x half
; ZVBB-NEXT: vle16.v v9, (a4)
; ZVBB-NEXT: vle16.v v8, (a2)
; ZVBB-NEXT: srli a1, a1, 2
-; ZVBB-NEXT: add a2, a1, a1
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v8, a1
-; ZVBB-NEXT: vsetvli a4, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v10, (a3)
; ZVBB-NEXT: vle16.v v8, (a0)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v10, a1
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a0, a0, 1
@@ -7559,13 +7514,12 @@ define <vscale x 8 x bfloat> @vector_interleave_nxv8bf16_nxv2bf16(<vscale x 2 x
; CHECK-NEXT: vle16.v v9, (a4)
; CHECK-NEXT: vle16.v v8, (a2)
; CHECK-NEXT: srli a1, a1, 2
-; CHECK-NEXT: add a2, a1, a1
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v8, a1
-; CHECK-NEXT: vsetvli a4, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v10, (a3)
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v10, a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 1
@@ -7590,13 +7544,12 @@ define <vscale x 8 x bfloat> @vector_interleave_nxv8bf16_nxv2bf16(<vscale x 2 x
; ZVBB-NEXT: vle16.v v9, (a4)
; ZVBB-NEXT: vle16.v v8, (a2)
; ZVBB-NEXT: srli a1, a1, 2
-; ZVBB-NEXT: add a2, a1, a1
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v8, a1
-; ZVBB-NEXT: vsetvli a4, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v10, (a3)
; ZVBB-NEXT: vle16.v v8, (a0)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v10, a1
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a0, a0, 1
@@ -7727,13 +7680,12 @@ define <vscale x 4 x float> @vector_interleave_nxv4f32_nxv1f32(<vscale x 1 x flo
; CHECK-NEXT: vle32.v v9, (a4)
; CHECK-NEXT: vle32.v v8, (a2)
; CHECK-NEXT: srli a1, a1, 3
-; CHECK-NEXT: add a2, a1, a1
-; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v8, a1
-; CHECK-NEXT: vsetvli a4, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v10, (a3)
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v10, a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 1
@@ -7758,13 +7710,12 @@ define <vscale x 4 x float> @vector_interleave_nxv4f32_nxv1f32(<vscale x 1 x flo
; ZVBB-NEXT: vle32.v v9, (a4)
; ZVBB-NEXT: vle32.v v8, (a2)
; ZVBB-NEXT: srli a1, a1, 3
-; ZVBB-NEXT: add a2, a1, a1
-; ZVBB-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v8, a1
-; ZVBB-NEXT: vsetvli a4, zero, e32, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; ZVBB-NEXT: vle32.v v10, (a3)
; ZVBB-NEXT: vle32.v v8, (a0)
-; ZVBB-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v10, a1
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a0, a0, 1
@@ -7998,13 +7949,12 @@ define <vscale x 10 x half> @vector_interleave_nxv10f16_nxv2f16(<vscale x 2 x ha
; CHECK-NEXT: vle16.v v8, (a5)
; CHECK-NEXT: vle16.v v9, (a4)
; CHECK-NEXT: srli a1, a1, 2
-; CHECK-NEXT: add a4, a1, a1
; CHECK-NEXT: vle16.v v10, (a3)
-; CHECK-NEXT: vsetvli zero, a4, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v8, a1
; CHECK-NEXT: vsetvli a3, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a4, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v10, a1
; CHECK-NEXT: add a2, a5, a2
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
@@ -8034,13 +7984,12 @@ define <vscale x 10 x half> @vector_interleave_nxv10f16_nxv2f16(<vscale x 2 x ha
; ZVBB-NEXT: vle16.v v8, (a5)
; ZVBB-NEXT: vle16.v v9, (a4)
; ZVBB-NEXT: srli a1, a1, 2
-; ZVBB-NEXT: add a4, a1, a1
; ZVBB-NEXT: vle16.v v10, (a3)
-; ZVBB-NEXT: vsetvli zero, a4, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a3, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v8, a1
; ZVBB-NEXT: vsetvli a3, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v8, (a0)
-; ZVBB-NEXT: vsetvli zero, a4, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v10, a1
; ZVBB-NEXT: add a2, a5, a2
; ZVBB-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
@@ -8466,13 +8415,12 @@ define <vscale x 10 x bfloat> @vector_interleave_nxv10bf16_nxv2bf16(<vscale x 2
; CHECK-NEXT: vle16.v v8, (a5)
; CHECK-NEXT: vle16.v v9, (a4)
; CHECK-NEXT: srli a1, a1, 2
-; CHECK-NEXT: add a4, a1, a1
; CHECK-NEXT: vle16.v v10, (a3)
-; CHECK-NEXT: vsetvli zero, a4, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v8, a1
; CHECK-NEXT: vsetvli a3, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a4, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v10, a1
; CHECK-NEXT: add a2, a5, a2
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
@@ -8502,13 +8450,12 @@ define <vscale x 10 x bfloat> @vector_interleave_nxv10bf16_nxv2bf16(<vscale x 2
; ZVBB-NEXT: vle16.v v8, (a5)
; ZVBB-NEXT: vle16.v v9, (a4)
; ZVBB-NEXT: srli a1, a1, 2
-; ZVBB-NEXT: add a4, a1, a1
; ZVBB-NEXT: vle16.v v10, (a3)
-; ZVBB-NEXT: vsetvli zero, a4, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a3, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v8, a1
; ZVBB-NEXT: vsetvli a3, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v8, (a0)
-; ZVBB-NEXT: vsetvli zero, a4, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v10, a1
; ZVBB-NEXT: add a2, a5, a2
; ZVBB-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
@@ -8934,13 +8881,12 @@ define <vscale x 5 x float> @vector_interleave_nxv5f32_nxv1f32(<vscale x 1 x flo
; CHECK-NEXT: vle32.v v8, (a5)
; CHECK-NEXT: vle32.v v9, (a4)
; CHECK-NEXT: srli a1, a1, 3
-; CHECK-NEXT: add a4, a1, a1
; CHECK-NEXT: vle32.v v10, (a3)
-; CHECK-NEXT: vsetvli zero, a4, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v8, a1
; CHECK-NEXT: vsetvli a3, zero, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a4, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v10, a1
; CHECK-NEXT: add a2, a5, a2
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
@@ -8970,13 +8916,12 @@ define <vscale x 5 x float> @vector_interleave_nxv5f32_nxv1f32(<vscale x 1 x flo
; ZVBB-NEXT: vle32.v v8, (a5)
; ZVBB-NEXT: vle32.v v9, (a4)
; ZVBB-NEXT: srli a1, a1, 3
-; ZVBB-NEXT: add a4, a1, a1
; ZVBB-NEXT: vle32.v v10, (a3)
-; ZVBB-NEXT: vsetvli zero, a4, e32, m1, ta, ma
+; ZVBB-NEXT: vsetvli a3, zero, e32, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v8, a1
; ZVBB-NEXT: vsetvli a3, zero, e32, mf2, ta, ma
; ZVBB-NEXT: vle32.v v8, (a0)
-; ZVBB-NEXT: vsetvli zero, a4, e32, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v10, a1
; ZVBB-NEXT: add a2, a5, a2
; ZVBB-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
@@ -9796,18 +9741,17 @@ define <vscale x 12 x half> @vector_interleave_nxv12f16_nxv2f16(<vscale x 2 x ha
; CHECK-NEXT: vle16.v v10, (a6)
; CHECK-NEXT: vle16.v v8, (a2)
; CHECK-NEXT: srli a1, a1, 2
-; CHECK-NEXT: add a2, a1, a1
; CHECK-NEXT: vle16.v v11, (a5)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v10, v8, a1
-; CHECK-NEXT: vsetvli a5, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v9, (a4)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v11, a1
-; CHECK-NEXT: vsetvli a4, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v11, (a3)
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v11, a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a1, a0, 1
@@ -9836,18 +9780,17 @@ define <vscale x 12 x half> @vector_interleave_nxv12f16_nxv2f16(<vscale x 2 x ha
; ZVBB-NEXT: vle16.v v10, (a6)
; ZVBB-NEXT: vle16.v v8, (a2)
; ZVBB-NEXT: srli a1, a1, 2
-; ZVBB-NEXT: add a2, a1, a1
; ZVBB-NEXT: vle16.v v11, (a5)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v10, v8, a1
-; ZVBB-NEXT: vsetvli a5, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v9, (a4)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v11, a1
-; ZVBB-NEXT: vsetvli a4, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v11, (a3)
; ZVBB-NEXT: vle16.v v8, (a0)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v11, a1
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a1, a0, 1
@@ -10311,18 +10254,17 @@ define <vscale x 12 x bfloat> @vector_interleave_nxv12bf16_nxv2bf16(<vscale x 2
; CHECK-NEXT: vle16.v v10, (a6)
; CHECK-NEXT: vle16.v v8, (a2)
; CHECK-NEXT: srli a1, a1, 2
-; CHECK-NEXT: add a2, a1, a1
; CHECK-NEXT: vle16.v v11, (a5)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v10, v8, a1
-; CHECK-NEXT: vsetvli a5, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v9, (a4)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v11, a1
-; CHECK-NEXT: vsetvli a4, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v11, (a3)
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v11, a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a1, a0, 1
@@ -10351,18 +10293,17 @@ define <vscale x 12 x bfloat> @vector_interleave_nxv12bf16_nxv2bf16(<vscale x 2
; ZVBB-NEXT: vle16.v v10, (a6)
; ZVBB-NEXT: vle16.v v8, (a2)
; ZVBB-NEXT: srli a1, a1, 2
-; ZVBB-NEXT: add a2, a1, a1
; ZVBB-NEXT: vle16.v v11, (a5)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v10, v8, a1
-; ZVBB-NEXT: vsetvli a5, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v9, (a4)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v11, a1
-; ZVBB-NEXT: vsetvli a4, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v11, (a3)
; ZVBB-NEXT: vle16.v v8, (a0)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v11, a1
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a1, a0, 1
@@ -10826,18 +10767,17 @@ define <vscale x 6 x float> @vector_interleave_nxv6f32_nxv1f32(<vscale x 1 x flo
; CHECK-NEXT: vle32.v v10, (a6)
; CHECK-NEXT: vle32.v v8, (a2)
; CHECK-NEXT: srli a1, a1, 3
-; CHECK-NEXT: add a2, a1, a1
; CHECK-NEXT: vle32.v v11, (a5)
-; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v10, v8, a1
-; CHECK-NEXT: vsetvli a5, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v9, (a4)
-; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v11, a1
-; CHECK-NEXT: vsetvli a4, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v11, (a3)
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v11, a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a1, a0, 1
@@ -10866,18 +10806,17 @@ define <vscale x 6 x float> @vector_interleave_nxv6f32_nxv1f32(<vscale x 1 x flo
; ZVBB-NEXT: vle32.v v10, (a6)
; ZVBB-NEXT: vle32.v v8, (a2)
; ZVBB-NEXT: srli a1, a1, 3
-; ZVBB-NEXT: add a2, a1, a1
; ZVBB-NEXT: vle32.v v11, (a5)
-; ZVBB-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v10, v8, a1
-; ZVBB-NEXT: vsetvli a5, zero, e32, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; ZVBB-NEXT: vle32.v v9, (a4)
-; ZVBB-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v11, a1
-; ZVBB-NEXT: vsetvli a4, zero, e32, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; ZVBB-NEXT: vle32.v v11, (a3)
; ZVBB-NEXT: vle32.v v8, (a0)
-; ZVBB-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v11, a1
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a1, a0, 1
@@ -11761,7 +11700,6 @@ define <vscale x 14 x half> @vector_interleave_nxv14f16_nxv2f16(<vscale x 2 x ha
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a2, a1, 1
-; CHECK-NEXT: srli a1, a1, 2
; CHECK-NEXT: add a3, a0, a2
; CHECK-NEXT: add a4, a3, a2
; CHECK-NEXT: add a5, a4, a2
@@ -11771,20 +11709,20 @@ define <vscale x 14 x half> @vector_interleave_nxv14f16_nxv2f16(<vscale x 2 x ha
; CHECK-NEXT: add a7, a6, a2
; CHECK-NEXT: vle16.v v8, (a7)
; CHECK-NEXT: vle16.v v10, (a6)
-; CHECK-NEXT: add a6, a1, a1
+; CHECK-NEXT: srli a1, a1, 2
; CHECK-NEXT: add a2, a7, a2
; CHECK-NEXT: vle16.v v12, (a5)
-; CHECK-NEXT: vsetvli zero, a6, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a5, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v10, v8, a1
; CHECK-NEXT: vsetvli a5, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v11, (a2)
; CHECK-NEXT: vle16.v v9, (a4)
-; CHECK-NEXT: vsetvli zero, a6, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v12, a1
; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v12, (a3)
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a6, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v12, a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 2
@@ -11801,7 +11739,6 @@ define <vscale x 14 x half> @vector_interleave_nxv14f16_nxv2f16(<vscale x 2 x ha
; ZVBB-NEXT: addi a0, sp, 16
; ZVBB-NEXT: csrr a1, vlenb
; ZVBB-NEXT: srli a2, a1, 1
-; ZVBB-NEXT: srli a1, a1, 2
; ZVBB-NEXT: add a3, a0, a2
; ZVBB-NEXT: add a4, a3, a2
; ZVBB-NEXT: add a5, a4, a2
@@ -11811,20 +11748,20 @@ define <vscale x 14 x half> @vector_interleave_nxv14f16_nxv2f16(<vscale x 2 x ha
; ZVBB-NEXT: add a7, a6, a2
; ZVBB-NEXT: vle16.v v8, (a7)
; ZVBB-NEXT: vle16.v v10, (a6)
-; ZVBB-NEXT: add a6, a1, a1
+; ZVBB-NEXT: srli a1, a1, 2
; ZVBB-NEXT: add a2, a7, a2
; ZVBB-NEXT: vle16.v v12, (a5)
-; ZVBB-NEXT: vsetvli zero, a6, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a5, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v10, v8, a1
; ZVBB-NEXT: vsetvli a5, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v11, (a2)
; ZVBB-NEXT: vle16.v v9, (a4)
-; ZVBB-NEXT: vsetvli zero, a6, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v12, a1
; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v12, (a3)
; ZVBB-NEXT: vle16.v v8, (a0)
-; ZVBB-NEXT: vsetvli zero, a6, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v12, a1
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a0, a0, 2
@@ -12325,7 +12262,6 @@ define <vscale x 14 x bfloat> @vector_interleave_nxv14bf16_nxv2bf16(<vscale x 2
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a2, a1, 1
-; CHECK-NEXT: srli a1, a1, 2
; CHECK-NEXT: add a3, a0, a2
; CHECK-NEXT: add a4, a3, a2
; CHECK-NEXT: add a5, a4, a2
@@ -12335,20 +12271,20 @@ define <vscale x 14 x bfloat> @vector_interleave_nxv14bf16_nxv2bf16(<vscale x 2
; CHECK-NEXT: add a7, a6, a2
; CHECK-NEXT: vle16.v v8, (a7)
; CHECK-NEXT: vle16.v v10, (a6)
-; CHECK-NEXT: add a6, a1, a1
+; CHECK-NEXT: srli a1, a1, 2
; CHECK-NEXT: add a2, a7, a2
; CHECK-NEXT: vle16.v v12, (a5)
-; CHECK-NEXT: vsetvli zero, a6, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a5, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v10, v8, a1
; CHECK-NEXT: vsetvli a5, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v11, (a2)
; CHECK-NEXT: vle16.v v9, (a4)
-; CHECK-NEXT: vsetvli zero, a6, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v12, a1
; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v12, (a3)
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a6, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v12, a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 2
@@ -12365,7 +12301,6 @@ define <vscale x 14 x bfloat> @vector_interleave_nxv14bf16_nxv2bf16(<vscale x 2
; ZVBB-NEXT: addi a0, sp, 16
; ZVBB-NEXT: csrr a1, vlenb
; ZVBB-NEXT: srli a2, a1, 1
-; ZVBB-NEXT: srli a1, a1, 2
; ZVBB-NEXT: add a3, a0, a2
; ZVBB-NEXT: add a4, a3, a2
; ZVBB-NEXT: add a5, a4, a2
@@ -12375,20 +12310,20 @@ define <vscale x 14 x bfloat> @vector_interleave_nxv14bf16_nxv2bf16(<vscale x 2
; ZVBB-NEXT: add a7, a6, a2
; ZVBB-NEXT: vle16.v v8, (a7)
; ZVBB-NEXT: vle16.v v10, (a6)
-; ZVBB-NEXT: add a6, a1, a1
+; ZVBB-NEXT: srli a1, a1, 2
; ZVBB-NEXT: add a2, a7, a2
; ZVBB-NEXT: vle16.v v12, (a5)
-; ZVBB-NEXT: vsetvli zero, a6, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a5, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v10, v8, a1
; ZVBB-NEXT: vsetvli a5, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v11, (a2)
; ZVBB-NEXT: vle16.v v9, (a4)
-; ZVBB-NEXT: vsetvli zero, a6, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v12, a1
; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v12, (a3)
; ZVBB-NEXT: vle16.v v8, (a0)
-; ZVBB-NEXT: vsetvli zero, a6, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v12, a1
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a0, a0, 2
@@ -12889,7 +12824,6 @@ define <vscale x 7 x float> @vector_interleave_nxv7f32_nxv1f32(<vscale x 1 x flo
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a2, a1, 1
-; CHECK-NEXT: srli a1, a1, 3
; CHECK-NEXT: add a3, a0, a2
; CHECK-NEXT: add a4, a3, a2
; CHECK-NEXT: add a5, a4, a2
@@ -12899,20 +12833,20 @@ define <vscale x 7 x float> @vector_interleave_nxv7f32_nxv1f32(<vscale x 1 x flo
; CHECK-NEXT: add a7, a6, a2
; CHECK-NEXT: vle32.v v8, (a7)
; CHECK-NEXT: vle32.v v10, (a6)
-; CHECK-NEXT: add a6, a1, a1
+; CHECK-NEXT: srli a1, a1, 3
; CHECK-NEXT: add a2, a7, a2
; CHECK-NEXT: vle32.v v12, (a5)
-; CHECK-NEXT: vsetvli zero, a6, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v10, v8, a1
; CHECK-NEXT: vsetvli a5, zero, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v11, (a2)
; CHECK-NEXT: vle32.v v9, (a4)
-; CHECK-NEXT: vsetvli zero, a6, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v12, a1
; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v12, (a3)
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a6, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v12, a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 2
@@ -12929,7 +12863,6 @@ define <vscale x 7 x float> @vector_interleave_nxv7f32_nxv1f32(<vscale x 1 x flo
; ZVBB-NEXT: addi a0, sp, 16
; ZVBB-NEXT: csrr a1, vlenb
; ZVBB-NEXT: srli a2, a1, 1
-; ZVBB-NEXT: srli a1, a1, 3
; ZVBB-NEXT: add a3, a0, a2
; ZVBB-NEXT: add a4, a3, a2
; ZVBB-NEXT: add a5, a4, a2
@@ -12939,20 +12872,20 @@ define <vscale x 7 x float> @vector_interleave_nxv7f32_nxv1f32(<vscale x 1 x flo
; ZVBB-NEXT: add a7, a6, a2
; ZVBB-NEXT: vle32.v v8, (a7)
; ZVBB-NEXT: vle32.v v10, (a6)
-; ZVBB-NEXT: add a6, a1, a1
+; ZVBB-NEXT: srli a1, a1, 3
; ZVBB-NEXT: add a2, a7, a2
; ZVBB-NEXT: vle32.v v12, (a5)
-; ZVBB-NEXT: vsetvli zero, a6, e32, m1, ta, ma
+; ZVBB-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v10, v8, a1
; ZVBB-NEXT: vsetvli a5, zero, e32, mf2, ta, ma
; ZVBB-NEXT: vle32.v v11, (a2)
; ZVBB-NEXT: vle32.v v9, (a4)
-; ZVBB-NEXT: vsetvli zero, a6, e32, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v12, a1
; ZVBB-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; ZVBB-NEXT: vle32.v v12, (a3)
; ZVBB-NEXT: vle32.v v8, (a0)
-; ZVBB-NEXT: vsetvli zero, a6, e32, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v12, a1
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a0, a0, 2
@@ -13945,23 +13878,22 @@ define <vscale x 16 x half> @vector_interleave_nxv16f16_nxv2f16(<vscale x 2 x ha
; CHECK-NEXT: vle16.v v11, (t0)
; CHECK-NEXT: vle16.v v8, (a2)
; CHECK-NEXT: srli a1, a1, 2
-; CHECK-NEXT: add a2, a1, a1
; CHECK-NEXT: vle16.v v9, (a7)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v11, v8, a1
-; CHECK-NEXT: vsetvli a7, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v10, (a6)
; CHECK-NEXT: vle16.v v8, (a5)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v10, v9, a1
-; CHECK-NEXT: vsetvli a5, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v9, (a4)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v8, a1
-; CHECK-NEXT: vsetvli a4, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v12, (a3)
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v12, a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 2
@@ -13990,23 +13922,22 @@ define <vscale x 16 x half> @vector_interleave_nxv16f16_nxv2f16(<vscale x 2 x ha
; ZVBB-NEXT: vle16.v v11, (t0)
; ZVBB-NEXT: vle16.v v8, (a2)
; ZVBB-NEXT: srli a1, a1, 2
-; ZVBB-NEXT: add a2, a1, a1
; ZVBB-NEXT: vle16.v v9, (a7)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v11, v8, a1
-; ZVBB-NEXT: vsetvli a7, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v10, (a6)
; ZVBB-NEXT: vle16.v v8, (a5)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v10, v9, a1
-; ZVBB-NEXT: vsetvli a5, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v9, (a4)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v8, a1
-; ZVBB-NEXT: vsetvli a4, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v12, (a3)
; ZVBB-NEXT: vle16.v v8, (a0)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v12, a1
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a0, a0, 2
@@ -14243,23 +14174,22 @@ define <vscale x 16 x bfloat> @vector_interleave_nxv16bf16_nxv2bf16(<vscale x 2
; CHECK-NEXT: vle16.v v11, (t0)
; CHECK-NEXT: vle16.v v8, (a2)
; CHECK-NEXT: srli a1, a1, 2
-; CHECK-NEXT: add a2, a1, a1
; CHECK-NEXT: vle16.v v9, (a7)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v11, v8, a1
-; CHECK-NEXT: vsetvli a7, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v10, (a6)
; CHECK-NEXT: vle16.v v8, (a5)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v10, v9, a1
-; CHECK-NEXT: vsetvli a5, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v9, (a4)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v8, a1
-; CHECK-NEXT: vsetvli a4, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v12, (a3)
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v12, a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 2
@@ -14288,23 +14218,22 @@ define <vscale x 16 x bfloat> @vector_interleave_nxv16bf16_nxv2bf16(<vscale x 2
; ZVBB-NEXT: vle16.v v11, (t0)
; ZVBB-NEXT: vle16.v v8, (a2)
; ZVBB-NEXT: srli a1, a1, 2
-; ZVBB-NEXT: add a2, a1, a1
; ZVBB-NEXT: vle16.v v9, (a7)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v11, v8, a1
-; ZVBB-NEXT: vsetvli a7, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v10, (a6)
; ZVBB-NEXT: vle16.v v8, (a5)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v10, v9, a1
-; ZVBB-NEXT: vsetvli a5, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v9, (a4)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v8, a1
-; ZVBB-NEXT: vsetvli a4, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v12, (a3)
; ZVBB-NEXT: vle16.v v8, (a0)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v12, a1
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a0, a0, 2
@@ -14541,23 +14470,22 @@ define <vscale x 8 x float> @vector_interleave_nxv8f32_nxv1f32(<vscale x 1 x flo
; CHECK-NEXT: vle32.v v11, (t0)
; CHECK-NEXT: vle32.v v8, (a2)
; CHECK-NEXT: srli a1, a1, 3
-; CHECK-NEXT: add a2, a1, a1
; CHECK-NEXT: vle32.v v9, (a7)
-; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v11, v8, a1
-; CHECK-NEXT: vsetvli a7, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v10, (a6)
; CHECK-NEXT: vle32.v v8, (a5)
-; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v10, v9, a1
-; CHECK-NEXT: vsetvli a5, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v9, (a4)
-; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v8, a1
-; CHECK-NEXT: vsetvli a4, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v12, (a3)
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v12, a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 2
@@ -14586,23 +14514,22 @@ define <vscale x 8 x float> @vector_interleave_nxv8f32_nxv1f32(<vscale x 1 x flo
; ZVBB-NEXT: vle32.v v11, (t0)
; ZVBB-NEXT: vle32.v v8, (a2)
; ZVBB-NEXT: srli a1, a1, 3
-; ZVBB-NEXT: add a2, a1, a1
; ZVBB-NEXT: vle32.v v9, (a7)
-; ZVBB-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v11, v8, a1
-; ZVBB-NEXT: vsetvli a7, zero, e32, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; ZVBB-NEXT: vle32.v v10, (a6)
; ZVBB-NEXT: vle32.v v8, (a5)
-; ZVBB-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v10, v9, a1
-; ZVBB-NEXT: vsetvli a5, zero, e32, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; ZVBB-NEXT: vle32.v v9, (a4)
-; ZVBB-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v8, a1
-; ZVBB-NEXT: vsetvli a4, zero, e32, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; ZVBB-NEXT: vle32.v v12, (a3)
; ZVBB-NEXT: vle32.v v8, (a0)
-; ZVBB-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v12, a1
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a0, a0, 2
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll
index df7af4d8b1667..111fa368ac155 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll
@@ -634,12 +634,11 @@ define <vscale x 32 x i1> @vfptosi_nxv32bf16_nxv32i1(<vscale x 32 x bfloat> %va)
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v16
; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v24
-; CHECK-NEXT: add a1, a0, a0
; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vand.vi v12, v12, 1
; CHECK-NEXT: vmsne.vi v16, v8, 0
; CHECK-NEXT: vmsne.vi v0, v12, 0
-; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v0, v16, a0
; CHECK-NEXT: ret
%evec = fptosi <vscale x 32 x bfloat> %va to <vscale x 32 x i1>
@@ -656,12 +655,11 @@ define <vscale x 32 x i1> @vfptoui_nxv32bf16_nxv32i1(<vscale x 32 x bfloat> %va)
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v16
; CHECK-NEXT: vfncvt.rtz.xu.f.w v12, v24
-; CHECK-NEXT: add a1, a0, a0
; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vand.vi v12, v12, 1
; CHECK-NEXT: vmsne.vi v16, v8, 0
; CHECK-NEXT: vmsne.vi v0, v12, 0
-; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v0, v16, a0
; CHECK-NEXT: ret
%evec = fptoui <vscale x 32 x bfloat> %va to <vscale x 32 x i1>
@@ -1654,12 +1652,11 @@ define <vscale x 32 x i1> @vfptosi_nxv32f16_nxv32i1(<vscale x 32 x half> %va) {
; ZVFHMIN-NEXT: srli a0, a0, 2
; ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v8, v16
; ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v12, v24
-; ZVFHMIN-NEXT: add a1, a0, a0
; ZVFHMIN-NEXT: vand.vi v8, v8, 1
; ZVFHMIN-NEXT: vand.vi v12, v12, 1
; ZVFHMIN-NEXT: vmsne.vi v16, v8, 0
; ZVFHMIN-NEXT: vmsne.vi v0, v12, 0
-; ZVFHMIN-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslideup.vx v0, v16, a0
; ZVFHMIN-NEXT: ret
%evec = fptosi <vscale x 32 x half> %va to <vscale x 32 x i1>
@@ -1684,12 +1681,11 @@ define <vscale x 32 x i1> @vfptoui_nxv32f16_nxv32i1(<vscale x 32 x half> %va) {
; ZVFHMIN-NEXT: srli a0, a0, 2
; ZVFHMIN-NEXT: vfncvt.rtz.xu.f.w v8, v16
; ZVFHMIN-NEXT: vfncvt.rtz.xu.f.w v12, v24
-; ZVFHMIN-NEXT: add a1, a0, a0
; ZVFHMIN-NEXT: vand.vi v8, v8, 1
; ZVFHMIN-NEXT: vand.vi v12, v12, 1
; ZVFHMIN-NEXT: vmsne.vi v16, v8, 0
; ZVFHMIN-NEXT: vmsne.vi v0, v12, 0
-; ZVFHMIN-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslideup.vx v0, v16, a0
; ZVFHMIN-NEXT: ret
%evec = fptoui <vscale x 32 x half> %va to <vscale x 32 x i1>
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
index 142ee5256f9e7..1868154052272 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
@@ -567,38 +567,37 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract(<vscale x 2 x i1> %
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
; RV32-NEXT: vmv1r.v v8, v0
-; RV32-NEXT: slli a2, a1, 1
; RV32-NEXT: vmv.v.i v9, 0
-; RV32-NEXT: li a1, -1
+; RV32-NEXT: li a2, -1
; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
; RV32-NEXT: vmv.v.i v10, 0
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
+; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; RV32-NEXT: vmerge.vim v11, v9, 1, v0
-; RV32-NEXT: srli a3, a3, 2
; RV32-NEXT: vwaddu.vv v12, v11, v11
-; RV32-NEXT: vwmaccu.vx v12, a1, v11
+; RV32-NEXT: vwmaccu.vx v12, a2, v11
+; RV32-NEXT: csrr a2, vlenb
+; RV32-NEXT: srli a2, a2, 2
; RV32-NEXT: vmsne.vi v0, v12, 0
-; RV32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; RV32-NEXT: vslidedown.vx v11, v12, a3
+; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; RV32-NEXT: vslidedown.vx v11, v12, a2
; RV32-NEXT: vmerge.vim v10, v10, 1, v0
-; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; RV32-NEXT: vmsne.vi v0, v11, 0
-; RV32-NEXT: add a1, a3, a3
+; RV32-NEXT: slli a3, a1, 1
; RV32-NEXT: vmerge.vim v9, v9, 1, v0
-; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; RV32-NEXT: vslideup.vx v10, v9, a3
-; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; RV32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV32-NEXT: vslideup.vx v10, v9, a2
+; RV32-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
; RV32-NEXT: vmsne.vi v0, v10, 0
; RV32-NEXT: vle32.v v10, (a0), v0.t
; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; RV32-NEXT: vnsrl.wx v13, v10, a1
; RV32-NEXT: vmv.x.s a1, v10
; RV32-NEXT: vnsrl.wi v12, v10, 0
-; RV32-NEXT: srli a2, a2, 1
+; RV32-NEXT: srli a3, a3, 1
; RV32-NEXT: vmv1r.v v0, v8
-; RV32-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; RV32-NEXT: vsetvli zero, a3, e32, m1, ta, ma
; RV32-NEXT: vsseg2e32.v v12, (a0), v0.t
; RV32-NEXT: mv a0, a1
; RV32-NEXT: ret
@@ -611,26 +610,24 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract(<vscale x 2 x i1> %
; RV64-NEXT: li a2, -1
; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
; RV64-NEXT: vmv.v.i v10, 0
-; RV64-NEXT: csrr a3, vlenb
-; RV64-NEXT: slli a4, a1, 33
-; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; RV64-NEXT: vmerge.vim v11, v9, 1, v0
-; RV64-NEXT: srli a3, a3, 2
; RV64-NEXT: vwaddu.vv v12, v11, v11
; RV64-NEXT: vwmaccu.vx v12, a2, v11
+; RV64-NEXT: csrr a2, vlenb
+; RV64-NEXT: srli a2, a2, 2
; RV64-NEXT: vmsne.vi v0, v12, 0
-; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; RV64-NEXT: vslidedown.vx v11, v12, a3
+; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; RV64-NEXT: vslidedown.vx v11, v12, a2
; RV64-NEXT: vmerge.vim v10, v10, 1, v0
-; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; RV64-NEXT: vmsne.vi v0, v11, 0
-; RV64-NEXT: add a1, a3, a3
+; RV64-NEXT: slli a3, a1, 33
; RV64-NEXT: vmerge.vim v9, v9, 1, v0
-; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; RV64-NEXT: vslideup.vx v10, v9, a3
; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV64-NEXT: vslideup.vx v10, v9, a2
; RV64-NEXT: vmsne.vi v0, v10, 0
-; RV64-NEXT: srli a1, a4, 32
+; RV64-NEXT: srli a1, a3, 32
; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; RV64-NEXT: vle32.v v10, (a0), v0.t
; RV64-NEXT: li a1, 32
@@ -638,9 +635,9 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract(<vscale x 2 x i1> %
; RV64-NEXT: vnsrl.wx v13, v10, a1
; RV64-NEXT: vmv.x.s a1, v10
; RV64-NEXT: vnsrl.wi v12, v10, 0
-; RV64-NEXT: srli a4, a4, 33
+; RV64-NEXT: srli a3, a3, 33
; RV64-NEXT: vmv1r.v v0, v8
-; RV64-NEXT: vsetvli zero, a4, e32, m1, ta, ma
+; RV64-NEXT: vsetvli zero, a3, e32, m1, ta, ma
; RV64-NEXT: vsseg2e32.v v12, (a0), v0.t
; RV64-NEXT: mv a0, a1
; RV64-NEXT: ret
@@ -807,10 +804,7 @@ define void @not_balanced_store_tree(<vscale x 1 x i32> %v0, <vscale x 2 x i32>
; RV32-NEXT: srli a3, a3, 3
; RV32-NEXT: vsetvli a4, zero, e32, m1, ta, ma
; RV32-NEXT: vslidedown.vx v8, v12, a3
-; RV32-NEXT: add a4, a3, a3
-; RV32-NEXT: vsetvli zero, a4, e32, m1, ta, ma
; RV32-NEXT: vslideup.vx v12, v8, a3
-; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma
; RV32-NEXT: vwaddu.vv v16, v12, v9
; RV32-NEXT: vwmaccu.vx v16, a2, v9
; RV32-NEXT: vsetvli a3, zero, e32, m2, ta, ma
@@ -831,10 +825,7 @@ define void @not_balanced_store_tree(<vscale x 1 x i32> %v0, <vscale x 2 x i32>
; RV64-NEXT: srli a3, a3, 3
; RV64-NEXT: vsetvli a4, zero, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vx v8, v12, a3
-; RV64-NEXT: add a4, a3, a3
-; RV64-NEXT: vsetvli zero, a4, e32, m1, ta, ma
; RV64-NEXT: vslideup.vx v12, v8, a3
-; RV64-NEXT: vsetvli a3, zero, e32, m1, ta, ma
; RV64-NEXT: vwaddu.vv v16, v12, v9
; RV64-NEXT: vwmaccu.vx v16, a2, v9
; RV64-NEXT: vsetvli a3, zero, e32, m2, ta, ma
@@ -858,29 +849,28 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>} @not_same_mask(<vscale x 2 x i1>
; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
; RV32-NEXT: vmv1r.v v9, v0
; RV32-NEXT: vmv1r.v v0, v8
-; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: vmv.v.i v8, 0
; RV32-NEXT: li a2, -1
; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
; RV32-NEXT: vmv.v.i v10, 0
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
+; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; RV32-NEXT: vmerge.vim v11, v8, 1, v0
; RV32-NEXT: vmv1r.v v0, v9
; RV32-NEXT: vmerge.vim v9, v8, 1, v0
-; RV32-NEXT: srli a3, a3, 2
; RV32-NEXT: vwaddu.vv v12, v9, v11
; RV32-NEXT: vwmaccu.vx v12, a2, v11
+; RV32-NEXT: csrr a2, vlenb
+; RV32-NEXT: srli a2, a2, 2
; RV32-NEXT: vmsne.vi v0, v12, 0
-; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
-; RV32-NEXT: vslidedown.vx v9, v12, a3
+; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; RV32-NEXT: vslidedown.vx v9, v12, a2
; RV32-NEXT: vmerge.vim v10, v10, 1, v0
-; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
+; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; RV32-NEXT: vmsne.vi v0, v9, 0
-; RV32-NEXT: add a2, a3, a3
+; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: vmerge.vim v8, v8, 1, v0
-; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
-; RV32-NEXT: vslideup.vx v10, v8, a3
+; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; RV32-NEXT: vslideup.vx v10, v8, a2
; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
; RV32-NEXT: vmsne.vi v0, v10, 0
; RV32-NEXT: vle32.v v10, (a0), v0.t
@@ -899,26 +889,24 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>} @not_same_mask(<vscale x 2 x i1>
; RV64-NEXT: li a2, -1
; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
; RV64-NEXT: vmv.v.i v10, 0
-; RV64-NEXT: csrr a3, vlenb
-; RV64-NEXT: slli a1, a1, 33
-; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
+; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; RV64-NEXT: vmerge.vim v11, v8, 1, v0
; RV64-NEXT: vmv1r.v v0, v9
; RV64-NEXT: vmerge.vim v9, v8, 1, v0
-; RV64-NEXT: srli a3, a3, 2
; RV64-NEXT: vwaddu.vv v12, v9, v11
; RV64-NEXT: vwmaccu.vx v12, a2, v11
+; RV64-NEXT: csrr a2, vlenb
+; RV64-NEXT: srli a2, a2, 2
; RV64-NEXT: vmsne.vi v0, v12, 0
-; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
-; RV64-NEXT: vslidedown.vx v9, v12, a3
+; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; RV64-NEXT: vslidedown.vx v9, v12, a2
; RV64-NEXT: vmerge.vim v10, v10, 1, v0
-; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
+; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; RV64-NEXT: vmsne.vi v0, v9, 0
-; RV64-NEXT: add a2, a3, a3
+; RV64-NEXT: slli a1, a1, 33
; RV64-NEXT: vmerge.vim v8, v8, 1, v0
-; RV64-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
-; RV64-NEXT: vslideup.vx v10, v8, a3
-; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; RV64-NEXT: vslideup.vx v10, v8, a2
; RV64-NEXT: vmsne.vi v0, v10, 0
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
>From 1b4687af7a75bef33046376ab1fd042634496dd6 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Tue, 17 Jun 2025 10:35:01 -0700
Subject: [PATCH 2/2] fixup! Add comment
---
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index b50d8fe72d5cb..b0e3f534e2aaa 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7377,6 +7377,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if ((Opcode == ISD::ADD || Opcode == ISD::SUB) &&
VT.getScalarType() == MVT::i1)
return getNode(ISD::XOR, DL, VT, N1, N2);
+ // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
if (Opcode == ISD::ADD && N1.getOpcode() == ISD::VSCALE &&
N2.getOpcode() == ISD::VSCALE) {
const APInt &C1 = N1->getConstantOperandAPInt(0);
More information about the llvm-commits
mailing list