[llvm] [RISCV] Set AllocationPriority in line with LMUL (PR #131176)
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 18 08:15:55 PDT 2025
https://github.com/preames updated https://github.com/llvm/llvm-project/pull/131176
>From 39fd13ebacde660b0b1b426691038d3430b1efad Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Thu, 13 Mar 2025 10:00:11 -0700
Subject: [PATCH 1/2] [RISCV] Set AllocationPriority in line with LMUL
This mechanism causes the greedy register allocator to prefer allocating
register classes with higher priority first. This helps to ensure that
high LMUL registers obtain a register without having to go through the
eviction mechanism. In practice, it seems to cause a bunch of code
churn, and some minor improvement around widening and narrowing
operations.
In a few of the widening tests, we have what look like code size
regressions because we end up with two smaller register class copies
instead of one larger one after the instruction. However, in any
larger code sequence, these are likely to be folded into the producing
instructions. (But so were the wider copies after the operation.)
Two observations:
1) We're not setting the greedy-regclass-priority-trumps-globalness flag
on the register class, so this doesn't help long mask ranges. I
thought about doing that, but the benefit is non-obvious, so I
decided it was worth a separate change at minimum.
2) We could arguably set the priority higher for the register classes
that exclude v0. I tried that, and it caused a whole bunch of
further churn. I may return to it in a separate patch.
---
llvm/lib/Target/RISCV/RISCVRegisterInfo.td | 6 +
.../redundant-copy-from-tail-duplicate.ll | 6 +-
.../CodeGen/RISCV/rvv/active_lane_mask.ll | 12 +-
.../RISCV/rvv/combine-store-extract-crash.ll | 40 +-
.../RISCV/rvv/common-shuffle-patterns.ll | 7 +-
llvm/test/CodeGen/RISCV/rvv/compressstore.ll | 50 +-
llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll | 136 +-
llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll | 92 +-
llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll | 206 +--
llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll | 130 +-
llvm/test/CodeGen/RISCV/rvv/expandload.ll | 1282 ++++++++-------
.../CodeGen/RISCV/rvv/fixed-vectors-abs.ll | 18 +-
.../CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll | 188 +--
.../CodeGen/RISCV/rvv/fixed-vectors-cttz.ll | 232 +--
.../RISCV/rvv/fixed-vectors-fp-conv.ll | 12 +-
.../RISCV/rvv/fixed-vectors-fp-interleave.ll | 66 +-
.../RISCV/rvv/fixed-vectors-fp-setcc.ll | 216 +--
.../RISCV/rvv/fixed-vectors-fp-shuffles.ll | 6 +-
.../CodeGen/RISCV/rvv/fixed-vectors-fp.ll | 1456 ++++++++---------
.../RISCV/rvv/fixed-vectors-fp2i-sat.ll | 16 +-
.../CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll | 12 +-
.../RISCV/rvv/fixed-vectors-fpext-vp.ll | 15 +-
.../RISCV/rvv/fixed-vectors-fptosi-vp.ll | 8 +-
.../RISCV/rvv/fixed-vectors-fptoui-vp.ll | 8 +-
.../CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll | 12 +-
.../rvv/fixed-vectors-insert-subvector.ll | 24 +-
.../RISCV/rvv/fixed-vectors-int-buildvec.ll | 66 +-
.../RISCV/rvv/fixed-vectors-int-exttrunc.ll | 12 +-
.../RISCV/rvv/fixed-vectors-int-interleave.ll | 81 +-
.../RISCV/rvv/fixed-vectors-int-shuffles.ll | 46 +-
.../CodeGen/RISCV/rvv/fixed-vectors-int.ll | 80 +-
.../rvv/fixed-vectors-interleaved-access.ll | 483 +++---
.../rvv/fixed-vectors-inttoptr-ptrtoint.ll | 4 +-
.../RISCV/rvv/fixed-vectors-llrint-vp.ll | 32 +-
.../CodeGen/RISCV/rvv/fixed-vectors-llrint.ll | 44 +-
.../RISCV/rvv/fixed-vectors-lrint-vp.ll | 16 +-
.../CodeGen/RISCV/rvv/fixed-vectors-lrint.ll | 76 +-
.../RISCV/rvv/fixed-vectors-masked-gather.ll | 8 +-
.../RISCV/rvv/fixed-vectors-masked-scatter.ll | 8 +-
.../RISCV/rvv/fixed-vectors-reduction-fp.ll | 16 +-
.../RISCV/rvv/fixed-vectors-reduction-int.ll | 16 +-
.../CodeGen/RISCV/rvv/fixed-vectors-sad.ll | 28 +-
.../RISCV/rvv/fixed-vectors-scalarized.ll | 28 +-
.../RISCV/rvv/fixed-vectors-setcc-fp-vp.ll | 532 +++---
.../RISCV/rvv/fixed-vectors-sext-vp.ll | 35 +-
.../fixed-vectors-shuffle-changes-length.ll | 50 +-
.../rvv/fixed-vectors-shuffle-exact-vlen.ll | 38 +-
.../rvv/fixed-vectors-shuffle-reverse.ll | 276 ++--
.../RISCV/rvv/fixed-vectors-shuffle-rotate.ll | 42 +-
.../rvv/fixed-vectors-shufflevector-vnsrl.ll | 6 +-
.../RISCV/rvv/fixed-vectors-sitofp-vp.ll | 8 +-
.../RISCV/rvv/fixed-vectors-uitofp-vp.ll | 8 +-
.../RISCV/rvv/fixed-vectors-unaligned.ll | 8 +-
.../RISCV/rvv/fixed-vectors-vfadd-vp.ll | 24 +-
.../RISCV/rvv/fixed-vectors-vfdiv-vp.ll | 24 +-
.../CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll | 24 +-
.../CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll | 24 +-
.../RISCV/rvv/fixed-vectors-vfmul-vp.ll | 24 +-
...fixed-vectors-vfpext-constrained-sdnode.ll | 16 +-
...fixed-vectors-vfptoi-constrained-sdnode.ll | 32 +-
.../RISCV/rvv/fixed-vectors-vfsub-vp.ll | 24 +-
.../RISCV/rvv/fixed-vectors-vfwmacc.ll | 36 +-
...fixed-vectors-vitofp-constrained-sdnode.ll | 50 +-
.../RISCV/rvv/fixed-vectors-vpgather.ll | 48 +-
.../CodeGen/RISCV/rvv/fixed-vectors-vrol.ll | 12 +-
.../CodeGen/RISCV/rvv/fixed-vectors-vror.ll | 76 +-
.../RISCV/rvv/fixed-vectors-vwadd-mask.ll | 31 +-
.../CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll | 9 +-
.../CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll | 288 ++--
.../RISCV/rvv/fixed-vectors-vwsub-mask.ll | 24 +-
.../RISCV/rvv/fixed-vectors-zext-vp.ll | 35 +-
.../CodeGen/RISCV/rvv/float-round-conv.ll | 56 +-
llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll | 8 +-
llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll | 8 +-
llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll | 12 +-
llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll | 12 +-
.../test/CodeGen/RISCV/rvv/half-round-conv.ll | 28 +-
.../CodeGen/RISCV/rvv/interleave-crash.ll | 40 +-
.../RISCV/rvv/intrinsic-vector-match.ll | 180 +-
llvm/test/CodeGen/RISCV/rvv/llrint-sdnode.ll | 12 +-
llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll | 12 +-
llvm/test/CodeGen/RISCV/rvv/lrint-sdnode.ll | 12 +-
llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll | 12 +-
llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll | 44 +-
.../RISCV/rvv/named-vector-shuffle-reverse.ll | 280 ++--
.../CodeGen/RISCV/rvv/narrow-shift-extend.ll | 6 +-
llvm/test/CodeGen/RISCV/rvv/pr61561.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/pr95865.ll | 10 +-
llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll | 130 +-
.../CodeGen/RISCV/rvv/sink-splat-operands.ll | 12 +-
.../CodeGen/RISCV/rvv/vcpop-shl-zext-opt.ll | 16 +-
.../RISCV/rvv/vector-deinterleave-fixed.ll | 72 +-
.../RISCV/rvv/vector-deinterleave-load.ll | 14 +-
.../CodeGen/RISCV/rvv/vector-deinterleave.ll | 116 +-
.../RISCV/rvv/vector-interleave-fixed.ll | 108 +-
.../RISCV/rvv/vector-interleave-store.ll | 28 +-
.../CodeGen/RISCV/rvv/vector-interleave.ll | 770 ++++-----
llvm/test/CodeGen/RISCV/rvv/vexts-sdnode.ll | 144 +-
.../RISCV/rvv/vfadd-constrained-sdnode.ll | 36 +-
llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll | 72 +-
.../CodeGen/RISCV/rvv/vfcopysign-sdnode.ll | 12 +-
.../RISCV/rvv/vfdiv-constrained-sdnode.ll | 48 +-
llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll | 72 +-
llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll | 1196 ++++++--------
.../RISCV/rvv/vfmsub-constrained-sdnode.ll | 82 +-
.../RISCV/rvv/vfmul-constrained-sdnode.ll | 36 +-
llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll | 36 +-
.../RISCV/rvv/vfnmadd-constrained-sdnode.ll | 70 +-
.../RISCV/rvv/vfnmsub-constrained-sdnode.ll | 34 +-
.../RISCV/rvv/vfpext-constrained-sdnode.ll | 28 +-
llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll | 36 +-
llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll | 12 +-
.../RISCV/rvv/vfptoi-constrained-sdnode.ll | 48 +-
llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll | 48 +-
llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll | 19 +-
llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll | 19 +-
llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll | 34 +-
.../RISCV/rvv/vfsub-constrained-sdnode.ll | 48 +-
llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll | 72 +-
llvm/test/CodeGen/RISCV/rvv/vfwadd-sdnode.ll | 39 +-
llvm/test/CodeGen/RISCV/rvv/vfwadd.ll | 102 +-
llvm/test/CodeGen/RISCV/rvv/vfwadd.w.ll | 40 +-
llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f.ll | 24 +-
llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x.ll | 36 +-
llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu.ll | 36 +-
llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f.ll | 24 +-
.../test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f.ll | 24 +-
llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f.ll | 48 +-
llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f.ll | 48 +-
llvm/test/CodeGen/RISCV/rvv/vfwcvtbf16-f-f.ll | 12 +-
llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll | 78 +-
llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll | 80 +-
llvm/test/CodeGen/RISCV/rvv/vfwmul-sdnode.ll | 39 +-
llvm/test/CodeGen/RISCV/rvv/vfwmul.ll | 102 +-
llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll | 114 +-
llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll | 114 +-
llvm/test/CodeGen/RISCV/rvv/vfwsub-sdnode.ll | 39 +-
llvm/test/CodeGen/RISCV/rvv/vfwsub.ll | 102 +-
llvm/test/CodeGen/RISCV/rvv/vfwsub.w.ll | 40 +-
.../RISCV/rvv/vitofp-constrained-sdnode.ll | 72 +-
llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll | 112 +-
llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 28 +-
llvm/test/CodeGen/RISCV/rvv/vloxei.ll | 144 +-
llvm/test/CodeGen/RISCV/rvv/vluxei.ll | 144 +-
.../CodeGen/RISCV/rvv/vp-inttoptr-ptrtoint.ll | 12 +-
.../RISCV/rvv/vp-vector-interleaved-access.ll | 20 +-
.../test/CodeGen/RISCV/rvv/vpgather-sdnode.ll | 36 +-
llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll | 10 +-
llvm/test/CodeGen/RISCV/rvv/vrol-sdnode.ll | 18 +-
llvm/test/CodeGen/RISCV/rvv/vror-sdnode.ll | 18 +-
.../RISCV/rvv/vscale-vw-web-simplification.ll | 64 +-
llvm/test/CodeGen/RISCV/rvv/vsext-vp.ll | 24 +-
llvm/test/CodeGen/RISCV/rvv/vsext.ll | 72 +-
llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll | 19 +-
llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll | 23 +-
llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll | 19 +-
.../CodeGen/RISCV/rvv/vwadd-mask-sdnode.ll | 31 +-
llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll | 110 +-
llvm/test/CodeGen/RISCV/rvv/vwadd.ll | 81 +-
llvm/test/CodeGen/RISCV/rvv/vwadd.w.ll | 32 +-
llvm/test/CodeGen/RISCV/rvv/vwaddu.ll | 81 +-
llvm/test/CodeGen/RISCV/rvv/vwaddu.w.ll | 32 +-
llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll | 81 +-
llvm/test/CodeGen/RISCV/rvv/vwmul.ll | 81 +-
llvm/test/CodeGen/RISCV/rvv/vwmulsu.ll | 81 +-
llvm/test/CodeGen/RISCV/rvv/vwmulu.ll | 81 +-
llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll | 258 +--
llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll | 216 +--
llvm/test/CodeGen/RISCV/rvv/vwsll.ll | 117 +-
.../CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll | 24 +-
llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll | 102 +-
llvm/test/CodeGen/RISCV/rvv/vwsub.ll | 81 +-
llvm/test/CodeGen/RISCV/rvv/vwsub.w.ll | 32 +-
llvm/test/CodeGen/RISCV/rvv/vwsubu.ll | 81 +-
llvm/test/CodeGen/RISCV/rvv/vwsubu.w.ll | 32 +-
llvm/test/CodeGen/RISCV/rvv/vzext-vp.ll | 24 +-
llvm/test/CodeGen/RISCV/rvv/vzext.ll | 72 +-
.../CodeGen/RISCV/rvv/zvbb-demanded-bits.ll | 6 +-
.../CodeGen/RISCV/srem-seteq-illegal-types.ll | 26 +-
179 files changed, 7579 insertions(+), 7479 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index a5dfb5ba1a2fc..1e0541e667895 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -752,18 +752,24 @@ def VR : VReg<!listconcat(VM1VTs, VMaskVTs),
def VRNoV0 : VReg<!listconcat(VM1VTs, VMaskVTs), (sub VR, V0), 1>;
+let AllocationPriority = 2 in
def VRM2 : VReg<VM2VTs, (add (sequence "V%uM2", 8, 31, 2),
(sequence "V%uM2", 6, 0, 2)), 2>;
+let AllocationPriority = 2 in
def VRM2NoV0 : VReg<VM2VTs, (sub VRM2, V0M2), 2>;
+let AllocationPriority = 4 in
def VRM4 : VReg<VM4VTs, (add V8M4, V12M4, V16M4, V20M4,
V24M4, V28M4, V4M4, V0M4), 4>;
+let AllocationPriority = 4 in
def VRM4NoV0 : VReg<VM4VTs, (sub VRM4, V0M4), 4>;
+let AllocationPriority = 8 in
def VRM8 : VReg<VM8VTs, (add V8M8, V16M8, V24M8, V0M8), 8>;
+let AllocationPriority = 8 in
def VRM8NoV0 : VReg<VM8VTs, (sub VRM8, V0M8), 8>;
def VMV0 : VReg<VMaskVTs, (add V0), 1>;
diff --git a/llvm/test/CodeGen/RISCV/redundant-copy-from-tail-duplicate.ll b/llvm/test/CodeGen/RISCV/redundant-copy-from-tail-duplicate.ll
index 5d588ad66b9ca..15b5698c22e81 100644
--- a/llvm/test/CodeGen/RISCV/redundant-copy-from-tail-duplicate.ll
+++ b/llvm/test/CodeGen/RISCV/redundant-copy-from-tail-duplicate.ll
@@ -20,10 +20,10 @@ define signext i32 @sum(ptr %a, i32 signext %n, i1 %prof.min.iters.check, <vscal
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_4: # %vector.ph
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v8, zero
-; CHECK-NEXT: vmv.v.i v12, 0
+; CHECK-NEXT: vmv.s.x v12, zero
+; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vsetivli zero, 1, e32, m4, ta, ma
-; CHECK-NEXT: vredsum.vs v8, v12, v8, v0.t
+; CHECK-NEXT: vredsum.vs v8, v8, v12, v0.t
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll
index 4ade6c09fe43d..ec422a8fbb928 100644
--- a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll
@@ -106,12 +106,12 @@ define <32 x i1> @fv32(ptr %p, i64 %index, i64 %tc) {
; CHECK-NEXT: lui a0, %hi(.LCPI8_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI8_0)
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vid.v v16
-; CHECK-NEXT: vsaddu.vx v16, v16, a1
-; CHECK-NEXT: vmsltu.vx v0, v16, a2
-; CHECK-NEXT: vsext.vf8 v16, v8
-; CHECK-NEXT: vsaddu.vx v8, v16, a1
+; CHECK-NEXT: vle8.v v16, (a0)
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: vsaddu.vx v8, v8, a1
+; CHECK-NEXT: vmsltu.vx v0, v8, a2
+; CHECK-NEXT: vsext.vf8 v8, v16
+; CHECK-NEXT: vsaddu.vx v8, v8, a1
; CHECK-NEXT: vmsltu.vx v16, v8, a2
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vslideup.vi v0, v16, 2
diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-store-extract-crash.ll b/llvm/test/CodeGen/RISCV/rvv/combine-store-extract-crash.ll
index 482cf83d540c4..496755738e6fa 100644
--- a/llvm/test/CodeGen/RISCV/rvv/combine-store-extract-crash.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/combine-store-extract-crash.ll
@@ -9,21 +9,21 @@ define void @test(ptr %ref_array, ptr %sad_array) {
; RV32: # %bb.0: # %entry
; RV32-NEXT: th.lwd a2, a3, (a0), 0, 3
; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; RV32-NEXT: vle8.v v8, (a2)
+; RV32-NEXT: vle8.v v12, (a2)
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT: vzext.vf4 v12, v8
-; RV32-NEXT: vmv.s.x v8, zero
-; RV32-NEXT: vredsum.vs v9, v12, v8
-; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: vzext.vf4 v8, v12
+; RV32-NEXT: vmv.s.x v12, zero
+; RV32-NEXT: vredsum.vs v8, v8, v12
+; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: th.swia a0, (a1), 4, 0
; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; RV32-NEXT: vle8.v v9, (a3)
-; RV32-NEXT: vmv.v.i v10, 0
+; RV32-NEXT: vle8.v v13, (a3)
+; RV32-NEXT: vmv.v.i v8, 0
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT: vslideup.vi v9, v10, 4
+; RV32-NEXT: vslideup.vi v13, v8, 4
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT: vzext.vf4 v12, v9
-; RV32-NEXT: vredsum.vs v8, v12, v8
+; RV32-NEXT: vzext.vf4 v8, v13
+; RV32-NEXT: vredsum.vs v8, v8, v12
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32-NEXT: vse32.v v8, (a1)
; RV32-NEXT: ret
@@ -32,21 +32,21 @@ define void @test(ptr %ref_array, ptr %sad_array) {
; RV64: # %bb.0: # %entry
; RV64-NEXT: th.ldd a2, a3, (a0), 0, 4
; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; RV64-NEXT: vle8.v v8, (a2)
+; RV64-NEXT: vle8.v v12, (a2)
; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV64-NEXT: vzext.vf4 v12, v8
-; RV64-NEXT: vmv.s.x v8, zero
-; RV64-NEXT: vredsum.vs v9, v12, v8
-; RV64-NEXT: vmv.x.s a0, v9
+; RV64-NEXT: vzext.vf4 v8, v12
+; RV64-NEXT: vmv.s.x v12, zero
+; RV64-NEXT: vredsum.vs v8, v8, v12
+; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: th.swia a0, (a1), 4, 0
; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; RV64-NEXT: vle8.v v9, (a3)
-; RV64-NEXT: vmv.v.i v10, 0
+; RV64-NEXT: vle8.v v13, (a3)
+; RV64-NEXT: vmv.v.i v8, 0
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT: vslideup.vi v9, v10, 4
+; RV64-NEXT: vslideup.vi v13, v8, 4
; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV64-NEXT: vzext.vf4 v12, v9
-; RV64-NEXT: vredsum.vs v8, v12, v8
+; RV64-NEXT: vzext.vf4 v8, v13
+; RV64-NEXT: vredsum.vs v8, v8, v12
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a1)
; RV64-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll b/llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll
index 1845c0e4bd3b6..7649d9ad6059f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll
@@ -8,10 +8,11 @@ define dso_local <16 x i16> @interleave(<8 x i16> %v0, <8 x i16> %v1) {
; CHECK-LABEL: interleave:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vwaddu.vv v10, v8, v9
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwaddu.vv v8, v11, v10
; CHECK-NEXT: li a0, -1
-; CHECK-NEXT: vwmaccu.vx v10, a0, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vwmaccu.vx v8, a0, v10
; CHECK-NEXT: ret
entry:
%v2 = shufflevector <8 x i16> %v0, <8 x i16> poison, <16 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7, i32 undef>
diff --git a/llvm/test/CodeGen/RISCV/rvv/compressstore.ll b/llvm/test/CodeGen/RISCV/rvv/compressstore.ll
index 61fb457a7eb65..69822e9d9d2e3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/compressstore.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/compressstore.ll
@@ -200,12 +200,12 @@ define void @test_compresstore_v256i8(ptr %p, <256 x i1> %mask, <256 x i8> %data
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vmv1r.v v7, v8
; RV64-NEXT: li a2, 128
-; RV64-NEXT: vslidedown.vi v9, v0, 1
+; RV64-NEXT: vslidedown.vi v8, v0, 1
; RV64-NEXT: vmv.x.s a3, v0
; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV64-NEXT: vle8.v v24, (a1)
; RV64-NEXT: vsetvli zero, a2, e64, m1, ta, ma
-; RV64-NEXT: vmv.x.s a1, v9
+; RV64-NEXT: vmv.x.s a1, v8
; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV64-NEXT: vcompress.vm v8, v16, v0
; RV64-NEXT: vcpop.m a4, v0
@@ -227,14 +227,14 @@ define void @test_compresstore_v256i8(ptr %p, <256 x i1> %mask, <256 x i8> %data
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vmv1r.v v7, v8
; RV32-NEXT: li a2, 128
-; RV32-NEXT: vslidedown.vi v9, v0, 1
+; RV32-NEXT: vslidedown.vi v8, v0, 1
; RV32-NEXT: li a3, 32
; RV32-NEXT: vmv.x.s a4, v0
; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV32-NEXT: vle8.v v24, (a1)
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vsrl.vx v6, v9, a3
-; RV32-NEXT: vmv.x.s a1, v9
+; RV32-NEXT: vsrl.vx v6, v8, a3
+; RV32-NEXT: vmv.x.s a1, v8
; RV32-NEXT: vsrl.vx v5, v0, a3
; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV32-NEXT: vcompress.vm v8, v16, v0
@@ -438,16 +438,16 @@ define void @test_compresstore_v128i16(ptr %p, <128 x i1> %mask, <128 x i16> %da
; RV64-NEXT: vcompress.vm v24, v8, v0
; RV64-NEXT: vcpop.m a2, v0
; RV64-NEXT: vsetivli zero, 8, e8, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v0, 8
+; RV64-NEXT: vslidedown.vi v7, v0, 8
; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; RV64-NEXT: vcompress.vm v0, v16, v8
-; RV64-NEXT: vcpop.m a1, v8
+; RV64-NEXT: vcompress.vm v8, v16, v7
+; RV64-NEXT: vcpop.m a1, v7
; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; RV64-NEXT: vse16.v v24, (a0)
; RV64-NEXT: slli a2, a2, 1
; RV64-NEXT: add a0, a0, a2
; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; RV64-NEXT: vse16.v v0, (a0)
+; RV64-NEXT: vse16.v v8, (a0)
; RV64-NEXT: ret
;
; RV32-LABEL: test_compresstore_v128i16:
@@ -635,16 +635,16 @@ define void @test_compresstore_v64i32(ptr %p, <64 x i1> %mask, <64 x i32> %data)
; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV64-NEXT: vse32.v v24, (a0)
; RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v0, 4
+; RV64-NEXT: vslidedown.vi v24, v0, 4
; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; RV64-NEXT: vmv.x.s a1, v0
-; RV64-NEXT: vcompress.vm v24, v16, v8
-; RV64-NEXT: vcpop.m a2, v8
+; RV64-NEXT: vcompress.vm v8, v16, v24
+; RV64-NEXT: vcpop.m a2, v24
; RV64-NEXT: cpopw a1, a1
; RV64-NEXT: slli a1, a1, 2
; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; RV64-NEXT: vse32.v v24, (a0)
+; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
;
; RV32-LABEL: test_compresstore_v64i32:
@@ -654,16 +654,16 @@ define void @test_compresstore_v64i32(ptr %p, <64 x i1> %mask, <64 x i32> %data)
; RV32-NEXT: vcompress.vm v24, v8, v0
; RV32-NEXT: vcpop.m a2, v0
; RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v0, 4
+; RV32-NEXT: vslidedown.vi v7, v0, 4
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; RV32-NEXT: vcompress.vm v0, v16, v8
-; RV32-NEXT: vcpop.m a1, v8
+; RV32-NEXT: vcompress.vm v8, v16, v7
+; RV32-NEXT: vcpop.m a1, v7
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vse32.v v24, (a0)
; RV32-NEXT: slli a2, a2, 2
; RV32-NEXT: add a0, a0, a2
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; RV32-NEXT: vse32.v v0, (a0)
+; RV32-NEXT: vse32.v v8, (a0)
; RV32-NEXT: ret
entry:
tail call void @llvm.masked.compressstore.v64i32(<64 x i32> %data, ptr align 4 %p, <64 x i1> %mask)
@@ -796,18 +796,18 @@ define void @test_compresstore_v32i64(ptr %p, <32 x i1> %mask, <32 x i64> %data)
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vse64.v v24, (a0)
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v0, 2
+; RV64-NEXT: vslidedown.vi v24, v0, 2
; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64-NEXT: vmv.x.s a1, v0
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vcompress.vm v24, v16, v8
+; RV64-NEXT: vcompress.vm v8, v16, v24
; RV64-NEXT: zext.h a1, a1
; RV64-NEXT: cpopw a1, a1
; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vcpop.m a1, v8
+; RV64-NEXT: vcpop.m a1, v24
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT: vse64.v v24, (a0)
+; RV64-NEXT: vse64.v v8, (a0)
; RV64-NEXT: ret
;
; RV32-LABEL: test_compresstore_v32i64:
@@ -818,18 +818,18 @@ define void @test_compresstore_v32i64(ptr %p, <32 x i1> %mask, <32 x i64> %data)
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: vse64.v v24, (a0)
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v0, 2
+; RV32-NEXT: vslidedown.vi v24, v0, 2
; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV32-NEXT: vmv.x.s a1, v0
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vcompress.vm v24, v16, v8
+; RV32-NEXT: vcompress.vm v8, v16, v24
; RV32-NEXT: zext.h a1, a1
; RV32-NEXT: cpop a1, a1
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: add a0, a0, a1
-; RV32-NEXT: vcpop.m a1, v8
+; RV32-NEXT: vcpop.m a1, v24
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT: vse64.v v24, (a0)
+; RV32-NEXT: vse64.v v8, (a0)
; RV32-NEXT: ret
entry:
tail call void @llvm.masked.compressstore.v32i64(<32 x i64> %data, ptr align 8 %p, <32 x i1> %mask)
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
index 208735b18cbab..97e1a7f41b92f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
@@ -162,12 +162,12 @@ define <vscale x 4 x i8> @ctlz_nxv4i8(<vscale x 4 x i8> %va) {
; CHECK-F-LABEL: ctlz_nxv4i8:
; CHECK-F: # %bb.0:
; CHECK-F-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-F-NEXT: vzext.vf2 v9, v8
+; CHECK-F-NEXT: vzext.vf2 v10, v8
; CHECK-F-NEXT: li a0, 134
-; CHECK-F-NEXT: vfwcvt.f.xu.v v10, v9
-; CHECK-F-NEXT: vnsrl.wi v8, v10, 23
+; CHECK-F-NEXT: vfwcvt.f.xu.v v8, v10
+; CHECK-F-NEXT: vnsrl.wi v10, v8, 23
; CHECK-F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-F-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-F-NEXT: vnsrl.wi v8, v10, 0
; CHECK-F-NEXT: vrsub.vx v8, v8, a0
; CHECK-F-NEXT: li a0, 8
; CHECK-F-NEXT: vminu.vx v8, v8, a0
@@ -176,12 +176,12 @@ define <vscale x 4 x i8> @ctlz_nxv4i8(<vscale x 4 x i8> %va) {
; CHECK-D-LABEL: ctlz_nxv4i8:
; CHECK-D: # %bb.0:
; CHECK-D-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-D-NEXT: vzext.vf2 v9, v8
+; CHECK-D-NEXT: vzext.vf2 v10, v8
; CHECK-D-NEXT: li a0, 134
-; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v9
-; CHECK-D-NEXT: vnsrl.wi v8, v10, 23
+; CHECK-D-NEXT: vfwcvt.f.xu.v v8, v10
+; CHECK-D-NEXT: vnsrl.wi v10, v8, 23
; CHECK-D-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-D-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-D-NEXT: vnsrl.wi v8, v10, 0
; CHECK-D-NEXT: vrsub.vx v8, v8, a0
; CHECK-D-NEXT: li a0, 8
; CHECK-D-NEXT: vminu.vx v8, v8, a0
@@ -225,13 +225,13 @@ define <vscale x 8 x i8> @ctlz_nxv8i8(<vscale x 8 x i8> %va) {
; CHECK-F-LABEL: ctlz_nxv8i8:
; CHECK-F: # %bb.0:
; CHECK-F-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-F-NEXT: vzext.vf2 v10, v8
+; CHECK-F-NEXT: vzext.vf2 v12, v8
; CHECK-F-NEXT: li a0, 134
-; CHECK-F-NEXT: vfwcvt.f.xu.v v12, v10
-; CHECK-F-NEXT: vnsrl.wi v8, v12, 23
+; CHECK-F-NEXT: vfwcvt.f.xu.v v8, v12
+; CHECK-F-NEXT: vnsrl.wi v12, v8, 23
; CHECK-F-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-F-NEXT: vnsrl.wi v10, v8, 0
-; CHECK-F-NEXT: vrsub.vx v8, v10, a0
+; CHECK-F-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-F-NEXT: vrsub.vx v8, v8, a0
; CHECK-F-NEXT: li a0, 8
; CHECK-F-NEXT: vminu.vx v8, v8, a0
; CHECK-F-NEXT: ret
@@ -239,13 +239,13 @@ define <vscale x 8 x i8> @ctlz_nxv8i8(<vscale x 8 x i8> %va) {
; CHECK-D-LABEL: ctlz_nxv8i8:
; CHECK-D: # %bb.0:
; CHECK-D-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-D-NEXT: vzext.vf2 v10, v8
+; CHECK-D-NEXT: vzext.vf2 v12, v8
; CHECK-D-NEXT: li a0, 134
-; CHECK-D-NEXT: vfwcvt.f.xu.v v12, v10
-; CHECK-D-NEXT: vnsrl.wi v8, v12, 23
+; CHECK-D-NEXT: vfwcvt.f.xu.v v8, v12
+; CHECK-D-NEXT: vnsrl.wi v12, v8, 23
; CHECK-D-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-D-NEXT: vnsrl.wi v10, v8, 0
-; CHECK-D-NEXT: vrsub.vx v8, v10, a0
+; CHECK-D-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-D-NEXT: vrsub.vx v8, v8, a0
; CHECK-D-NEXT: li a0, 8
; CHECK-D-NEXT: vminu.vx v8, v8, a0
; CHECK-D-NEXT: ret
@@ -288,13 +288,13 @@ define <vscale x 16 x i8> @ctlz_nxv16i8(<vscale x 16 x i8> %va) {
; CHECK-F-LABEL: ctlz_nxv16i8:
; CHECK-F: # %bb.0:
; CHECK-F-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-F-NEXT: vzext.vf2 v12, v8
+; CHECK-F-NEXT: vzext.vf2 v16, v8
; CHECK-F-NEXT: li a0, 134
-; CHECK-F-NEXT: vfwcvt.f.xu.v v16, v12
-; CHECK-F-NEXT: vnsrl.wi v8, v16, 23
+; CHECK-F-NEXT: vfwcvt.f.xu.v v8, v16
+; CHECK-F-NEXT: vnsrl.wi v16, v8, 23
; CHECK-F-NEXT: vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-F-NEXT: vnsrl.wi v12, v8, 0
-; CHECK-F-NEXT: vrsub.vx v8, v12, a0
+; CHECK-F-NEXT: vnsrl.wi v8, v16, 0
+; CHECK-F-NEXT: vrsub.vx v8, v8, a0
; CHECK-F-NEXT: li a0, 8
; CHECK-F-NEXT: vminu.vx v8, v8, a0
; CHECK-F-NEXT: ret
@@ -302,13 +302,13 @@ define <vscale x 16 x i8> @ctlz_nxv16i8(<vscale x 16 x i8> %va) {
; CHECK-D-LABEL: ctlz_nxv16i8:
; CHECK-D: # %bb.0:
; CHECK-D-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-D-NEXT: vzext.vf2 v12, v8
+; CHECK-D-NEXT: vzext.vf2 v16, v8
; CHECK-D-NEXT: li a0, 134
-; CHECK-D-NEXT: vfwcvt.f.xu.v v16, v12
-; CHECK-D-NEXT: vnsrl.wi v8, v16, 23
+; CHECK-D-NEXT: vfwcvt.f.xu.v v8, v16
+; CHECK-D-NEXT: vnsrl.wi v16, v8, 23
; CHECK-D-NEXT: vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-D-NEXT: vnsrl.wi v12, v8, 0
-; CHECK-D-NEXT: vrsub.vx v8, v12, a0
+; CHECK-D-NEXT: vnsrl.wi v8, v16, 0
+; CHECK-D-NEXT: vrsub.vx v8, v8, a0
; CHECK-D-NEXT: li a0, 8
; CHECK-D-NEXT: vminu.vx v8, v8, a0
; CHECK-D-NEXT: ret
@@ -1375,12 +1375,12 @@ define <vscale x 2 x i64> @ctlz_nxv2i64(<vscale x 2 x i64> %va) {
; CHECK-F-NEXT: fsrmi a1, 1
; CHECK-F-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8
-; CHECK-F-NEXT: vmv.v.x v8, a0
-; CHECK-F-NEXT: vsrl.vi v9, v10, 23
-; CHECK-F-NEXT: vwsubu.vv v10, v8, v9
+; CHECK-F-NEXT: vmv.v.x v11, a0
+; CHECK-F-NEXT: vsrl.vi v10, v10, 23
+; CHECK-F-NEXT: vwsubu.vv v8, v11, v10
; CHECK-F-NEXT: li a0, 64
; CHECK-F-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-F-NEXT: vminu.vx v8, v10, a0
+; CHECK-F-NEXT: vminu.vx v8, v8, a0
; CHECK-F-NEXT: fsrm a1
; CHECK-F-NEXT: ret
;
@@ -1515,12 +1515,12 @@ define <vscale x 4 x i64> @ctlz_nxv4i64(<vscale x 4 x i64> %va) {
; CHECK-F-NEXT: fsrmi a1, 1
; CHECK-F-NEXT: vsetvli a2, zero, e32, m2, ta, ma
; CHECK-F-NEXT: vfncvt.f.xu.w v12, v8
-; CHECK-F-NEXT: vmv.v.x v8, a0
-; CHECK-F-NEXT: vsrl.vi v10, v12, 23
-; CHECK-F-NEXT: vwsubu.vv v12, v8, v10
+; CHECK-F-NEXT: vmv.v.x v14, a0
+; CHECK-F-NEXT: vsrl.vi v12, v12, 23
+; CHECK-F-NEXT: vwsubu.vv v8, v14, v12
; CHECK-F-NEXT: li a0, 64
; CHECK-F-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-F-NEXT: vminu.vx v8, v12, a0
+; CHECK-F-NEXT: vminu.vx v8, v8, a0
; CHECK-F-NEXT: fsrm a1
; CHECK-F-NEXT: ret
;
@@ -1655,12 +1655,12 @@ define <vscale x 8 x i64> @ctlz_nxv8i64(<vscale x 8 x i64> %va) {
; CHECK-F-NEXT: fsrmi a1, 1
; CHECK-F-NEXT: vsetvli a2, zero, e32, m4, ta, ma
; CHECK-F-NEXT: vfncvt.f.xu.w v16, v8
-; CHECK-F-NEXT: vmv.v.x v8, a0
-; CHECK-F-NEXT: vsrl.vi v12, v16, 23
-; CHECK-F-NEXT: vwsubu.vv v16, v8, v12
+; CHECK-F-NEXT: vmv.v.x v20, a0
+; CHECK-F-NEXT: vsrl.vi v16, v16, 23
+; CHECK-F-NEXT: vwsubu.vv v8, v20, v16
; CHECK-F-NEXT: li a0, 64
; CHECK-F-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-F-NEXT: vminu.vx v8, v16, a0
+; CHECK-F-NEXT: vminu.vx v8, v8, a0
; CHECK-F-NEXT: fsrm a1
; CHECK-F-NEXT: ret
;
@@ -1832,11 +1832,11 @@ define <vscale x 4 x i8> @ctlz_zero_undef_nxv4i8(<vscale x 4 x i8> %va) {
; CHECK-F-LABEL: ctlz_zero_undef_nxv4i8:
; CHECK-F: # %bb.0:
; CHECK-F-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-F-NEXT: vzext.vf2 v9, v8
-; CHECK-F-NEXT: vfwcvt.f.xu.v v10, v9
-; CHECK-F-NEXT: vnsrl.wi v8, v10, 23
+; CHECK-F-NEXT: vzext.vf2 v10, v8
+; CHECK-F-NEXT: vfwcvt.f.xu.v v8, v10
+; CHECK-F-NEXT: vnsrl.wi v10, v8, 23
; CHECK-F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-F-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-F-NEXT: vnsrl.wi v8, v10, 0
; CHECK-F-NEXT: li a0, 134
; CHECK-F-NEXT: vrsub.vx v8, v8, a0
; CHECK-F-NEXT: ret
@@ -1844,11 +1844,11 @@ define <vscale x 4 x i8> @ctlz_zero_undef_nxv4i8(<vscale x 4 x i8> %va) {
; CHECK-D-LABEL: ctlz_zero_undef_nxv4i8:
; CHECK-D: # %bb.0:
; CHECK-D-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-D-NEXT: vzext.vf2 v9, v8
-; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v9
-; CHECK-D-NEXT: vnsrl.wi v8, v10, 23
+; CHECK-D-NEXT: vzext.vf2 v10, v8
+; CHECK-D-NEXT: vfwcvt.f.xu.v v8, v10
+; CHECK-D-NEXT: vnsrl.wi v10, v8, 23
; CHECK-D-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-D-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-D-NEXT: vnsrl.wi v8, v10, 0
; CHECK-D-NEXT: li a0, 134
; CHECK-D-NEXT: vrsub.vx v8, v8, a0
; CHECK-D-NEXT: ret
@@ -1890,25 +1890,25 @@ define <vscale x 8 x i8> @ctlz_zero_undef_nxv8i8(<vscale x 8 x i8> %va) {
; CHECK-F-LABEL: ctlz_zero_undef_nxv8i8:
; CHECK-F: # %bb.0:
; CHECK-F-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-F-NEXT: vzext.vf2 v10, v8
-; CHECK-F-NEXT: vfwcvt.f.xu.v v12, v10
-; CHECK-F-NEXT: vnsrl.wi v8, v12, 23
+; CHECK-F-NEXT: vzext.vf2 v12, v8
+; CHECK-F-NEXT: vfwcvt.f.xu.v v8, v12
+; CHECK-F-NEXT: vnsrl.wi v12, v8, 23
; CHECK-F-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-F-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-F-NEXT: vnsrl.wi v8, v12, 0
; CHECK-F-NEXT: li a0, 134
-; CHECK-F-NEXT: vrsub.vx v8, v10, a0
+; CHECK-F-NEXT: vrsub.vx v8, v8, a0
; CHECK-F-NEXT: ret
;
; CHECK-D-LABEL: ctlz_zero_undef_nxv8i8:
; CHECK-D: # %bb.0:
; CHECK-D-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-D-NEXT: vzext.vf2 v10, v8
-; CHECK-D-NEXT: vfwcvt.f.xu.v v12, v10
-; CHECK-D-NEXT: vnsrl.wi v8, v12, 23
+; CHECK-D-NEXT: vzext.vf2 v12, v8
+; CHECK-D-NEXT: vfwcvt.f.xu.v v8, v12
+; CHECK-D-NEXT: vnsrl.wi v12, v8, 23
; CHECK-D-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-D-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-D-NEXT: vnsrl.wi v8, v12, 0
; CHECK-D-NEXT: li a0, 134
-; CHECK-D-NEXT: vrsub.vx v8, v10, a0
+; CHECK-D-NEXT: vrsub.vx v8, v8, a0
; CHECK-D-NEXT: ret
;
; CHECK-ZVBB-LABEL: ctlz_zero_undef_nxv8i8:
@@ -1948,25 +1948,25 @@ define <vscale x 16 x i8> @ctlz_zero_undef_nxv16i8(<vscale x 16 x i8> %va) {
; CHECK-F-LABEL: ctlz_zero_undef_nxv16i8:
; CHECK-F: # %bb.0:
; CHECK-F-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-F-NEXT: vzext.vf2 v12, v8
-; CHECK-F-NEXT: vfwcvt.f.xu.v v16, v12
-; CHECK-F-NEXT: vnsrl.wi v8, v16, 23
+; CHECK-F-NEXT: vzext.vf2 v16, v8
+; CHECK-F-NEXT: vfwcvt.f.xu.v v8, v16
+; CHECK-F-NEXT: vnsrl.wi v16, v8, 23
; CHECK-F-NEXT: vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-F-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-F-NEXT: vnsrl.wi v8, v16, 0
; CHECK-F-NEXT: li a0, 134
-; CHECK-F-NEXT: vrsub.vx v8, v12, a0
+; CHECK-F-NEXT: vrsub.vx v8, v8, a0
; CHECK-F-NEXT: ret
;
; CHECK-D-LABEL: ctlz_zero_undef_nxv16i8:
; CHECK-D: # %bb.0:
; CHECK-D-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-D-NEXT: vzext.vf2 v12, v8
-; CHECK-D-NEXT: vfwcvt.f.xu.v v16, v12
-; CHECK-D-NEXT: vnsrl.wi v8, v16, 23
+; CHECK-D-NEXT: vzext.vf2 v16, v8
+; CHECK-D-NEXT: vfwcvt.f.xu.v v8, v16
+; CHECK-D-NEXT: vnsrl.wi v16, v8, 23
; CHECK-D-NEXT: vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-D-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-D-NEXT: vnsrl.wi v8, v16, 0
; CHECK-D-NEXT: li a0, 134
-; CHECK-D-NEXT: vrsub.vx v8, v12, a0
+; CHECK-D-NEXT: vrsub.vx v8, v8, a0
; CHECK-D-NEXT: ret
;
; CHECK-ZVBB-LABEL: ctlz_zero_undef_nxv16i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
index ceca813782461..570ff34b0f23a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
@@ -120,11 +120,11 @@ define <vscale x 4 x i8> @vp_ctlz_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1
; CHECK-LABEL: vp_ctlz_nxv4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vzext.vf2 v9, v8, v0.t
+; CHECK-NEXT: vzext.vf2 v10, v8, v0.t
; CHECK-NEXT: li a0, 134
-; CHECK-NEXT: vfwcvt.f.xu.v v10, v9, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vsrl.vi v8, v10, 23, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
@@ -147,12 +147,12 @@ define <vscale x 4 x i8> @vp_ctlz_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zer
; CHECK-LABEL: vp_ctlz_nxv4i8_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vzext.vf2 v9, v8
+; CHECK-NEXT: vzext.vf2 v10, v8
; CHECK-NEXT: li a0, 134
-; CHECK-NEXT: vfwcvt.f.xu.v v10, v9
-; CHECK-NEXT: vnsrl.wi v8, v10, 23
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT: vnsrl.wi v10, v8, 23
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnsrl.wi v8, v10, 0
; CHECK-NEXT: vrsub.vx v8, v8, a0
; CHECK-NEXT: li a0, 8
; CHECK-NEXT: vminu.vx v8, v8, a0
@@ -173,11 +173,11 @@ define <vscale x 8 x i8> @vp_ctlz_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1
; CHECK-LABEL: vp_ctlz_nxv8i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v10, v8, v0.t
+; CHECK-NEXT: vzext.vf2 v12, v8, v0.t
; CHECK-NEXT: li a0, 134
-; CHECK-NEXT: vfwcvt.f.xu.v v12, v10, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vsrl.vi v8, v12, 23, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
@@ -200,13 +200,13 @@ define <vscale x 8 x i8> @vp_ctlz_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zer
; CHECK-LABEL: vp_ctlz_nxv8i8_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v10, v8
+; CHECK-NEXT: vzext.vf2 v12, v8
; CHECK-NEXT: li a0, 134
-; CHECK-NEXT: vfwcvt.f.xu.v v12, v10
-; CHECK-NEXT: vnsrl.wi v8, v12, 23
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT: vnsrl.wi v12, v8, 23
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
-; CHECK-NEXT: vrsub.vx v8, v10, a0
+; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: vrsub.vx v8, v8, a0
; CHECK-NEXT: li a0, 8
; CHECK-NEXT: vminu.vx v8, v8, a0
; CHECK-NEXT: ret
@@ -226,11 +226,11 @@ define <vscale x 16 x i8> @vp_ctlz_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16
; CHECK-LABEL: vp_ctlz_nxv16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vzext.vf2 v12, v8, v0.t
+; CHECK-NEXT: vzext.vf2 v16, v8, v0.t
; CHECK-NEXT: li a0, 134
-; CHECK-NEXT: vfwcvt.f.xu.v v16, v12, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vsrl.vi v8, v16, 23, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
@@ -253,13 +253,13 @@ define <vscale x 16 x i8> @vp_ctlz_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32
; CHECK-LABEL: vp_ctlz_nxv16i8_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vzext.vf2 v12, v8
+; CHECK-NEXT: vzext.vf2 v16, v8
; CHECK-NEXT: li a0, 134
-; CHECK-NEXT: vfwcvt.f.xu.v v16, v12
-; CHECK-NEXT: vnsrl.wi v8, v16, 23
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v16
+; CHECK-NEXT: vnsrl.wi v16, v8, 23
; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v12, v8, 0
-; CHECK-NEXT: vrsub.vx v8, v12, a0
+; CHECK-NEXT: vnsrl.wi v8, v16, 0
+; CHECK-NEXT: vrsub.vx v8, v8, a0
; CHECK-NEXT: li a0, 8
; CHECK-NEXT: vminu.vx v8, v8, a0
; CHECK-NEXT: ret
@@ -1444,11 +1444,11 @@ define <vscale x 4 x i8> @vp_ctlz_zero_undef_nxv4i8(<vscale x 4 x i8> %va, <vsca
; CHECK-LABEL: vp_ctlz_zero_undef_nxv4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vzext.vf2 v9, v8, v0.t
+; CHECK-NEXT: vzext.vf2 v10, v8, v0.t
; CHECK-NEXT: li a0, 134
-; CHECK-NEXT: vfwcvt.f.xu.v v10, v9, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vsrl.vi v8, v10, 23, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
@@ -1469,11 +1469,11 @@ define <vscale x 4 x i8> @vp_ctlz_zero_undef_nxv4i8_unmasked(<vscale x 4 x i8> %
; CHECK-LABEL: vp_ctlz_zero_undef_nxv4i8_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vzext.vf2 v9, v8
-; CHECK-NEXT: vfwcvt.f.xu.v v10, v9
-; CHECK-NEXT: vnsrl.wi v8, v10, 23
+; CHECK-NEXT: vzext.vf2 v10, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT: vnsrl.wi v10, v8, 23
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnsrl.wi v8, v10, 0
; CHECK-NEXT: li a0, 134
; CHECK-NEXT: vrsub.vx v8, v8, a0
; CHECK-NEXT: ret
@@ -1492,11 +1492,11 @@ define <vscale x 8 x i8> @vp_ctlz_zero_undef_nxv8i8(<vscale x 8 x i8> %va, <vsca
; CHECK-LABEL: vp_ctlz_zero_undef_nxv8i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v10, v8, v0.t
+; CHECK-NEXT: vzext.vf2 v12, v8, v0.t
; CHECK-NEXT: li a0, 134
-; CHECK-NEXT: vfwcvt.f.xu.v v12, v10, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vsrl.vi v8, v12, 23, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
@@ -1517,13 +1517,13 @@ define <vscale x 8 x i8> @vp_ctlz_zero_undef_nxv8i8_unmasked(<vscale x 8 x i8> %
; CHECK-LABEL: vp_ctlz_zero_undef_nxv8i8_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vfwcvt.f.xu.v v12, v10
-; CHECK-NEXT: vnsrl.wi v8, v12, 23
+; CHECK-NEXT: vzext.vf2 v12, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT: vnsrl.wi v12, v8, 23
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vnsrl.wi v8, v12, 0
; CHECK-NEXT: li a0, 134
-; CHECK-NEXT: vrsub.vx v8, v10, a0
+; CHECK-NEXT: vrsub.vx v8, v8, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i8_unmasked:
@@ -1540,11 +1540,11 @@ define <vscale x 16 x i8> @vp_ctlz_zero_undef_nxv16i8(<vscale x 16 x i8> %va, <v
; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vzext.vf2 v12, v8, v0.t
+; CHECK-NEXT: vzext.vf2 v16, v8, v0.t
; CHECK-NEXT: li a0, 134
-; CHECK-NEXT: vfwcvt.f.xu.v v16, v12, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vsrl.vi v8, v16, 23, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
@@ -1565,13 +1565,13 @@ define <vscale x 16 x i8> @vp_ctlz_zero_undef_nxv16i8_unmasked(<vscale x 16 x i8
; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i8_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vzext.vf2 v12, v8
-; CHECK-NEXT: vfwcvt.f.xu.v v16, v12
-; CHECK-NEXT: vnsrl.wi v8, v16, 23
+; CHECK-NEXT: vzext.vf2 v16, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v16
+; CHECK-NEXT: vnsrl.wi v16, v8, 23
; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-NEXT: vnsrl.wi v8, v16, 0
; CHECK-NEXT: li a0, 134
-; CHECK-NEXT: vrsub.vx v8, v12, a0
+; CHECK-NEXT: vrsub.vx v8, v8, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i8_unmasked:
diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
index bd7a20f9ef590..09aa487cb085f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
@@ -166,9 +166,9 @@ define <vscale x 4 x i8> @cttz_nxv4i8(<vscale x 4 x i8> %va) {
; CHECK-F-NEXT: li a0, 127
; CHECK-F-NEXT: vand.vv v9, v8, v9
; CHECK-F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-F-NEXT: vzext.vf2 v10, v9
-; CHECK-F-NEXT: vfwcvt.f.xu.v v12, v10
-; CHECK-F-NEXT: vnsrl.wi v9, v12, 23
+; CHECK-F-NEXT: vzext.vf2 v12, v9
+; CHECK-F-NEXT: vfwcvt.f.xu.v v10, v12
+; CHECK-F-NEXT: vnsrl.wi v9, v10, 23
; CHECK-F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; CHECK-F-NEXT: vnsrl.wi v9, v9, 0
; CHECK-F-NEXT: vmseq.vi v0, v8, 0
@@ -183,9 +183,9 @@ define <vscale x 4 x i8> @cttz_nxv4i8(<vscale x 4 x i8> %va) {
; CHECK-D-NEXT: li a0, 127
; CHECK-D-NEXT: vand.vv v9, v8, v9
; CHECK-D-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-D-NEXT: vzext.vf2 v10, v9
-; CHECK-D-NEXT: vfwcvt.f.xu.v v12, v10
-; CHECK-D-NEXT: vnsrl.wi v9, v12, 23
+; CHECK-D-NEXT: vzext.vf2 v12, v9
+; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v12
+; CHECK-D-NEXT: vnsrl.wi v9, v10, 23
; CHECK-D-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; CHECK-D-NEXT: vnsrl.wi v9, v9, 0
; CHECK-D-NEXT: vmseq.vi v0, v8, 0
@@ -561,10 +561,10 @@ define <vscale x 4 x i16> @cttz_nxv4i16(<vscale x 4 x i16> %va) {
; CHECK-F-NEXT: vrsub.vi v9, v8, 0
; CHECK-F-NEXT: li a0, 127
; CHECK-F-NEXT: vmseq.vi v0, v8, 0
-; CHECK-F-NEXT: vand.vv v8, v8, v9
-; CHECK-F-NEXT: vfwcvt.f.xu.v v10, v8
-; CHECK-F-NEXT: vnsrl.wi v8, v10, 23
-; CHECK-F-NEXT: vsub.vx v8, v8, a0
+; CHECK-F-NEXT: vand.vv v10, v8, v9
+; CHECK-F-NEXT: vfwcvt.f.xu.v v8, v10
+; CHECK-F-NEXT: vnsrl.wi v10, v8, 23
+; CHECK-F-NEXT: vsub.vx v8, v10, a0
; CHECK-F-NEXT: li a0, 16
; CHECK-F-NEXT: vmerge.vxm v8, v8, a0, v0
; CHECK-F-NEXT: ret
@@ -575,10 +575,10 @@ define <vscale x 4 x i16> @cttz_nxv4i16(<vscale x 4 x i16> %va) {
; CHECK-D-NEXT: vrsub.vi v9, v8, 0
; CHECK-D-NEXT: li a0, 127
; CHECK-D-NEXT: vmseq.vi v0, v8, 0
-; CHECK-D-NEXT: vand.vv v8, v8, v9
-; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v8
-; CHECK-D-NEXT: vnsrl.wi v8, v10, 23
-; CHECK-D-NEXT: vsub.vx v8, v8, a0
+; CHECK-D-NEXT: vand.vv v10, v8, v9
+; CHECK-D-NEXT: vfwcvt.f.xu.v v8, v10
+; CHECK-D-NEXT: vnsrl.wi v10, v8, 23
+; CHECK-D-NEXT: vsub.vx v8, v10, a0
; CHECK-D-NEXT: li a0, 16
; CHECK-D-NEXT: vmerge.vxm v8, v8, a0, v0
; CHECK-D-NEXT: ret
@@ -627,10 +627,10 @@ define <vscale x 8 x i16> @cttz_nxv8i16(<vscale x 8 x i16> %va) {
; CHECK-F-NEXT: vrsub.vi v10, v8, 0
; CHECK-F-NEXT: li a0, 127
; CHECK-F-NEXT: vmseq.vi v0, v8, 0
-; CHECK-F-NEXT: vand.vv v8, v8, v10
-; CHECK-F-NEXT: vfwcvt.f.xu.v v12, v8
-; CHECK-F-NEXT: vnsrl.wi v8, v12, 23
-; CHECK-F-NEXT: vsub.vx v8, v8, a0
+; CHECK-F-NEXT: vand.vv v12, v8, v10
+; CHECK-F-NEXT: vfwcvt.f.xu.v v8, v12
+; CHECK-F-NEXT: vnsrl.wi v12, v8, 23
+; CHECK-F-NEXT: vsub.vx v8, v12, a0
; CHECK-F-NEXT: li a0, 16
; CHECK-F-NEXT: vmerge.vxm v8, v8, a0, v0
; CHECK-F-NEXT: ret
@@ -641,10 +641,10 @@ define <vscale x 8 x i16> @cttz_nxv8i16(<vscale x 8 x i16> %va) {
; CHECK-D-NEXT: vrsub.vi v10, v8, 0
; CHECK-D-NEXT: li a0, 127
; CHECK-D-NEXT: vmseq.vi v0, v8, 0
-; CHECK-D-NEXT: vand.vv v8, v8, v10
-; CHECK-D-NEXT: vfwcvt.f.xu.v v12, v8
-; CHECK-D-NEXT: vnsrl.wi v8, v12, 23
-; CHECK-D-NEXT: vsub.vx v8, v8, a0
+; CHECK-D-NEXT: vand.vv v12, v8, v10
+; CHECK-D-NEXT: vfwcvt.f.xu.v v8, v12
+; CHECK-D-NEXT: vnsrl.wi v12, v8, 23
+; CHECK-D-NEXT: vsub.vx v8, v12, a0
; CHECK-D-NEXT: li a0, 16
; CHECK-D-NEXT: vmerge.vxm v8, v8, a0, v0
; CHECK-D-NEXT: ret
@@ -693,10 +693,10 @@ define <vscale x 16 x i16> @cttz_nxv16i16(<vscale x 16 x i16> %va) {
; CHECK-F-NEXT: vrsub.vi v12, v8, 0
; CHECK-F-NEXT: li a0, 127
; CHECK-F-NEXT: vmseq.vi v0, v8, 0
-; CHECK-F-NEXT: vand.vv v8, v8, v12
-; CHECK-F-NEXT: vfwcvt.f.xu.v v16, v8
-; CHECK-F-NEXT: vnsrl.wi v8, v16, 23
-; CHECK-F-NEXT: vsub.vx v8, v8, a0
+; CHECK-F-NEXT: vand.vv v16, v8, v12
+; CHECK-F-NEXT: vfwcvt.f.xu.v v8, v16
+; CHECK-F-NEXT: vnsrl.wi v16, v8, 23
+; CHECK-F-NEXT: vsub.vx v8, v16, a0
; CHECK-F-NEXT: li a0, 16
; CHECK-F-NEXT: vmerge.vxm v8, v8, a0, v0
; CHECK-F-NEXT: ret
@@ -707,10 +707,10 @@ define <vscale x 16 x i16> @cttz_nxv16i16(<vscale x 16 x i16> %va) {
; CHECK-D-NEXT: vrsub.vi v12, v8, 0
; CHECK-D-NEXT: li a0, 127
; CHECK-D-NEXT: vmseq.vi v0, v8, 0
-; CHECK-D-NEXT: vand.vv v8, v8, v12
-; CHECK-D-NEXT: vfwcvt.f.xu.v v16, v8
-; CHECK-D-NEXT: vnsrl.wi v8, v16, 23
-; CHECK-D-NEXT: vsub.vx v8, v8, a0
+; CHECK-D-NEXT: vand.vv v16, v8, v12
+; CHECK-D-NEXT: vfwcvt.f.xu.v v8, v16
+; CHECK-D-NEXT: vnsrl.wi v16, v8, 23
+; CHECK-D-NEXT: vsub.vx v8, v16, a0
; CHECK-D-NEXT: li a0, 16
; CHECK-D-NEXT: vmerge.vxm v8, v8, a0, v0
; CHECK-D-NEXT: ret
@@ -1327,11 +1327,11 @@ define <vscale x 2 x i64> @cttz_nxv2i64(<vscale x 2 x i64> %va) {
; CHECK-F-NEXT: vmseq.vi v0, v8, 0
; CHECK-F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-F-NEXT: vfncvt.f.xu.w v8, v10
-; CHECK-F-NEXT: vsrl.vi v8, v8, 23
-; CHECK-F-NEXT: vwsubu.vx v10, v8, a1
+; CHECK-F-NEXT: vsrl.vi v10, v8, 23
+; CHECK-F-NEXT: vwsubu.vx v8, v10, a1
; CHECK-F-NEXT: li a1, 64
; CHECK-F-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-F-NEXT: vmerge.vxm v8, v10, a1, v0
+; CHECK-F-NEXT: vmerge.vxm v8, v8, a1, v0
; CHECK-F-NEXT: fsrm a0
; CHECK-F-NEXT: ret
;
@@ -1451,11 +1451,11 @@ define <vscale x 4 x i64> @cttz_nxv4i64(<vscale x 4 x i64> %va) {
; CHECK-F-NEXT: vmseq.vi v0, v8, 0
; CHECK-F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-F-NEXT: vfncvt.f.xu.w v8, v12
-; CHECK-F-NEXT: vsrl.vi v8, v8, 23
-; CHECK-F-NEXT: vwsubu.vx v12, v8, a1
+; CHECK-F-NEXT: vsrl.vi v12, v8, 23
+; CHECK-F-NEXT: vwsubu.vx v8, v12, a1
; CHECK-F-NEXT: li a1, 64
; CHECK-F-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-F-NEXT: vmerge.vxm v8, v12, a1, v0
+; CHECK-F-NEXT: vmerge.vxm v8, v8, a1, v0
; CHECK-F-NEXT: fsrm a0
; CHECK-F-NEXT: ret
;
@@ -1575,11 +1575,11 @@ define <vscale x 8 x i64> @cttz_nxv8i64(<vscale x 8 x i64> %va) {
; CHECK-F-NEXT: vmseq.vi v0, v8, 0
; CHECK-F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-F-NEXT: vfncvt.f.xu.w v8, v16
-; CHECK-F-NEXT: vsrl.vi v8, v8, 23
-; CHECK-F-NEXT: vwsubu.vx v16, v8, a1
+; CHECK-F-NEXT: vsrl.vi v16, v8, 23
+; CHECK-F-NEXT: vwsubu.vx v8, v16, a1
; CHECK-F-NEXT: li a1, 64
; CHECK-F-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-F-NEXT: vmerge.vxm v8, v16, a1, v0
+; CHECK-F-NEXT: vmerge.vxm v8, v8, a1, v0
; CHECK-F-NEXT: fsrm a0
; CHECK-F-NEXT: ret
;
@@ -1757,11 +1757,11 @@ define <vscale x 4 x i8> @cttz_zero_undef_nxv4i8(<vscale x 4 x i8> %va) {
; CHECK-F-NEXT: vrsub.vi v9, v8, 0
; CHECK-F-NEXT: vand.vv v8, v8, v9
; CHECK-F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-F-NEXT: vzext.vf2 v9, v8
-; CHECK-F-NEXT: vfwcvt.f.xu.v v10, v9
-; CHECK-F-NEXT: vnsrl.wi v8, v10, 23
+; CHECK-F-NEXT: vzext.vf2 v10, v8
+; CHECK-F-NEXT: vfwcvt.f.xu.v v8, v10
+; CHECK-F-NEXT: vnsrl.wi v10, v8, 23
; CHECK-F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-F-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-F-NEXT: vnsrl.wi v8, v10, 0
; CHECK-F-NEXT: li a0, 127
; CHECK-F-NEXT: vsub.vx v8, v8, a0
; CHECK-F-NEXT: ret
@@ -1772,11 +1772,11 @@ define <vscale x 4 x i8> @cttz_zero_undef_nxv4i8(<vscale x 4 x i8> %va) {
; CHECK-D-NEXT: vrsub.vi v9, v8, 0
; CHECK-D-NEXT: vand.vv v8, v8, v9
; CHECK-D-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-D-NEXT: vzext.vf2 v9, v8
-; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v9
-; CHECK-D-NEXT: vnsrl.wi v8, v10, 23
+; CHECK-D-NEXT: vzext.vf2 v10, v8
+; CHECK-D-NEXT: vfwcvt.f.xu.v v8, v10
+; CHECK-D-NEXT: vnsrl.wi v10, v8, 23
; CHECK-D-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-D-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-D-NEXT: vnsrl.wi v8, v10, 0
; CHECK-D-NEXT: li a0, 127
; CHECK-D-NEXT: vsub.vx v8, v8, a0
; CHECK-D-NEXT: ret
@@ -1817,13 +1817,13 @@ define <vscale x 8 x i8> @cttz_zero_undef_nxv8i8(<vscale x 8 x i8> %va) {
; CHECK-F-NEXT: vrsub.vi v9, v8, 0
; CHECK-F-NEXT: vand.vv v8, v8, v9
; CHECK-F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-F-NEXT: vzext.vf2 v10, v8
-; CHECK-F-NEXT: vfwcvt.f.xu.v v12, v10
-; CHECK-F-NEXT: vnsrl.wi v8, v12, 23
+; CHECK-F-NEXT: vzext.vf2 v12, v8
+; CHECK-F-NEXT: vfwcvt.f.xu.v v8, v12
+; CHECK-F-NEXT: vnsrl.wi v12, v8, 23
; CHECK-F-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-F-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-F-NEXT: vnsrl.wi v8, v12, 0
; CHECK-F-NEXT: li a0, 127
-; CHECK-F-NEXT: vsub.vx v8, v10, a0
+; CHECK-F-NEXT: vsub.vx v8, v8, a0
; CHECK-F-NEXT: ret
;
; CHECK-D-LABEL: cttz_zero_undef_nxv8i8:
@@ -1832,13 +1832,13 @@ define <vscale x 8 x i8> @cttz_zero_undef_nxv8i8(<vscale x 8 x i8> %va) {
; CHECK-D-NEXT: vrsub.vi v9, v8, 0
; CHECK-D-NEXT: vand.vv v8, v8, v9
; CHECK-D-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-D-NEXT: vzext.vf2 v10, v8
-; CHECK-D-NEXT: vfwcvt.f.xu.v v12, v10
-; CHECK-D-NEXT: vnsrl.wi v8, v12, 23
+; CHECK-D-NEXT: vzext.vf2 v12, v8
+; CHECK-D-NEXT: vfwcvt.f.xu.v v8, v12
+; CHECK-D-NEXT: vnsrl.wi v12, v8, 23
; CHECK-D-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-D-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-D-NEXT: vnsrl.wi v8, v12, 0
; CHECK-D-NEXT: li a0, 127
-; CHECK-D-NEXT: vsub.vx v8, v10, a0
+; CHECK-D-NEXT: vsub.vx v8, v8, a0
; CHECK-D-NEXT: ret
;
; CHECK-ZVBB-LABEL: cttz_zero_undef_nxv8i8:
@@ -1877,13 +1877,13 @@ define <vscale x 16 x i8> @cttz_zero_undef_nxv16i8(<vscale x 16 x i8> %va) {
; CHECK-F-NEXT: vrsub.vi v10, v8, 0
; CHECK-F-NEXT: vand.vv v8, v8, v10
; CHECK-F-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-F-NEXT: vzext.vf2 v12, v8
-; CHECK-F-NEXT: vfwcvt.f.xu.v v16, v12
-; CHECK-F-NEXT: vnsrl.wi v8, v16, 23
+; CHECK-F-NEXT: vzext.vf2 v16, v8
+; CHECK-F-NEXT: vfwcvt.f.xu.v v8, v16
+; CHECK-F-NEXT: vnsrl.wi v16, v8, 23
; CHECK-F-NEXT: vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-F-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-F-NEXT: vnsrl.wi v8, v16, 0
; CHECK-F-NEXT: li a0, 127
-; CHECK-F-NEXT: vsub.vx v8, v12, a0
+; CHECK-F-NEXT: vsub.vx v8, v8, a0
; CHECK-F-NEXT: ret
;
; CHECK-D-LABEL: cttz_zero_undef_nxv16i8:
@@ -1892,13 +1892,13 @@ define <vscale x 16 x i8> @cttz_zero_undef_nxv16i8(<vscale x 16 x i8> %va) {
; CHECK-D-NEXT: vrsub.vi v10, v8, 0
; CHECK-D-NEXT: vand.vv v8, v8, v10
; CHECK-D-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-D-NEXT: vzext.vf2 v12, v8
-; CHECK-D-NEXT: vfwcvt.f.xu.v v16, v12
-; CHECK-D-NEXT: vnsrl.wi v8, v16, 23
+; CHECK-D-NEXT: vzext.vf2 v16, v8
+; CHECK-D-NEXT: vfwcvt.f.xu.v v8, v16
+; CHECK-D-NEXT: vnsrl.wi v16, v8, 23
; CHECK-D-NEXT: vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-D-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-D-NEXT: vnsrl.wi v8, v16, 0
; CHECK-D-NEXT: li a0, 127
-; CHECK-D-NEXT: vsub.vx v8, v12, a0
+; CHECK-D-NEXT: vsub.vx v8, v8, a0
; CHECK-D-NEXT: ret
;
; CHECK-ZVBB-LABEL: cttz_zero_undef_nxv16i8:
@@ -2120,22 +2120,22 @@ define <vscale x 4 x i16> @cttz_zero_undef_nxv4i16(<vscale x 4 x i16> %va) {
; CHECK-F: # %bb.0:
; CHECK-F-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-F-NEXT: vrsub.vi v9, v8, 0
-; CHECK-F-NEXT: vand.vv v8, v8, v9
-; CHECK-F-NEXT: vfwcvt.f.xu.v v10, v8
-; CHECK-F-NEXT: vnsrl.wi v8, v10, 23
+; CHECK-F-NEXT: vand.vv v10, v8, v9
+; CHECK-F-NEXT: vfwcvt.f.xu.v v8, v10
+; CHECK-F-NEXT: vnsrl.wi v10, v8, 23
; CHECK-F-NEXT: li a0, 127
-; CHECK-F-NEXT: vsub.vx v8, v8, a0
+; CHECK-F-NEXT: vsub.vx v8, v10, a0
; CHECK-F-NEXT: ret
;
; CHECK-D-LABEL: cttz_zero_undef_nxv4i16:
; CHECK-D: # %bb.0:
; CHECK-D-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-D-NEXT: vrsub.vi v9, v8, 0
-; CHECK-D-NEXT: vand.vv v8, v8, v9
-; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v8
-; CHECK-D-NEXT: vnsrl.wi v8, v10, 23
+; CHECK-D-NEXT: vand.vv v10, v8, v9
+; CHECK-D-NEXT: vfwcvt.f.xu.v v8, v10
+; CHECK-D-NEXT: vnsrl.wi v10, v8, 23
; CHECK-D-NEXT: li a0, 127
-; CHECK-D-NEXT: vsub.vx v8, v8, a0
+; CHECK-D-NEXT: vsub.vx v8, v10, a0
; CHECK-D-NEXT: ret
;
; CHECK-ZVBB-LABEL: cttz_zero_undef_nxv4i16:
@@ -2179,22 +2179,22 @@ define <vscale x 8 x i16> @cttz_zero_undef_nxv8i16(<vscale x 8 x i16> %va) {
; CHECK-F: # %bb.0:
; CHECK-F-NEXT: vsetvli a0, zero, e16, m2, ta, ma
; CHECK-F-NEXT: vrsub.vi v10, v8, 0
-; CHECK-F-NEXT: vand.vv v8, v8, v10
-; CHECK-F-NEXT: vfwcvt.f.xu.v v12, v8
-; CHECK-F-NEXT: vnsrl.wi v8, v12, 23
+; CHECK-F-NEXT: vand.vv v12, v8, v10
+; CHECK-F-NEXT: vfwcvt.f.xu.v v8, v12
+; CHECK-F-NEXT: vnsrl.wi v12, v8, 23
; CHECK-F-NEXT: li a0, 127
-; CHECK-F-NEXT: vsub.vx v8, v8, a0
+; CHECK-F-NEXT: vsub.vx v8, v12, a0
; CHECK-F-NEXT: ret
;
; CHECK-D-LABEL: cttz_zero_undef_nxv8i16:
; CHECK-D: # %bb.0:
; CHECK-D-NEXT: vsetvli a0, zero, e16, m2, ta, ma
; CHECK-D-NEXT: vrsub.vi v10, v8, 0
-; CHECK-D-NEXT: vand.vv v8, v8, v10
-; CHECK-D-NEXT: vfwcvt.f.xu.v v12, v8
-; CHECK-D-NEXT: vnsrl.wi v8, v12, 23
+; CHECK-D-NEXT: vand.vv v12, v8, v10
+; CHECK-D-NEXT: vfwcvt.f.xu.v v8, v12
+; CHECK-D-NEXT: vnsrl.wi v12, v8, 23
; CHECK-D-NEXT: li a0, 127
-; CHECK-D-NEXT: vsub.vx v8, v8, a0
+; CHECK-D-NEXT: vsub.vx v8, v12, a0
; CHECK-D-NEXT: ret
;
; CHECK-ZVBB-LABEL: cttz_zero_undef_nxv8i16:
@@ -2238,22 +2238,22 @@ define <vscale x 16 x i16> @cttz_zero_undef_nxv16i16(<vscale x 16 x i16> %va) {
; CHECK-F: # %bb.0:
; CHECK-F-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; CHECK-F-NEXT: vrsub.vi v12, v8, 0
-; CHECK-F-NEXT: vand.vv v8, v8, v12
-; CHECK-F-NEXT: vfwcvt.f.xu.v v16, v8
-; CHECK-F-NEXT: vnsrl.wi v8, v16, 23
+; CHECK-F-NEXT: vand.vv v16, v8, v12
+; CHECK-F-NEXT: vfwcvt.f.xu.v v8, v16
+; CHECK-F-NEXT: vnsrl.wi v16, v8, 23
; CHECK-F-NEXT: li a0, 127
-; CHECK-F-NEXT: vsub.vx v8, v8, a0
+; CHECK-F-NEXT: vsub.vx v8, v16, a0
; CHECK-F-NEXT: ret
;
; CHECK-D-LABEL: cttz_zero_undef_nxv16i16:
; CHECK-D: # %bb.0:
; CHECK-D-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; CHECK-D-NEXT: vrsub.vi v12, v8, 0
-; CHECK-D-NEXT: vand.vv v8, v8, v12
-; CHECK-D-NEXT: vfwcvt.f.xu.v v16, v8
-; CHECK-D-NEXT: vnsrl.wi v8, v16, 23
+; CHECK-D-NEXT: vand.vv v16, v8, v12
+; CHECK-D-NEXT: vfwcvt.f.xu.v v8, v16
+; CHECK-D-NEXT: vnsrl.wi v16, v8, 23
; CHECK-D-NEXT: li a0, 127
-; CHECK-D-NEXT: vsub.vx v8, v8, a0
+; CHECK-D-NEXT: vsub.vx v8, v16, a0
; CHECK-D-NEXT: ret
;
; CHECK-ZVBB-LABEL: cttz_zero_undef_nxv16i16:
@@ -2412,11 +2412,11 @@ define <vscale x 2 x i32> @cttz_zero_undef_nxv2i32(<vscale x 2 x i32> %va) {
; CHECK-D-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-D-NEXT: vrsub.vi v9, v8, 0
; CHECK-D-NEXT: li a0, 52
-; CHECK-D-NEXT: vand.vv v8, v8, v9
-; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v8
-; CHECK-D-NEXT: vnsrl.wx v8, v10, a0
+; CHECK-D-NEXT: vand.vv v10, v8, v9
+; CHECK-D-NEXT: vfwcvt.f.xu.v v8, v10
+; CHECK-D-NEXT: vnsrl.wx v10, v8, a0
; CHECK-D-NEXT: li a0, 1023
-; CHECK-D-NEXT: vsub.vx v8, v8, a0
+; CHECK-D-NEXT: vsub.vx v8, v10, a0
; CHECK-D-NEXT: ret
;
; CHECK-ZVBB-LABEL: cttz_zero_undef_nxv2i32:
@@ -2475,11 +2475,11 @@ define <vscale x 4 x i32> @cttz_zero_undef_nxv4i32(<vscale x 4 x i32> %va) {
; CHECK-D-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-D-NEXT: vrsub.vi v10, v8, 0
; CHECK-D-NEXT: li a0, 52
-; CHECK-D-NEXT: vand.vv v8, v8, v10
-; CHECK-D-NEXT: vfwcvt.f.xu.v v12, v8
-; CHECK-D-NEXT: vnsrl.wx v8, v12, a0
+; CHECK-D-NEXT: vand.vv v12, v8, v10
+; CHECK-D-NEXT: vfwcvt.f.xu.v v8, v12
+; CHECK-D-NEXT: vnsrl.wx v12, v8, a0
; CHECK-D-NEXT: li a0, 1023
-; CHECK-D-NEXT: vsub.vx v8, v8, a0
+; CHECK-D-NEXT: vsub.vx v8, v12, a0
; CHECK-D-NEXT: ret
;
; CHECK-ZVBB-LABEL: cttz_zero_undef_nxv4i32:
@@ -2538,11 +2538,11 @@ define <vscale x 8 x i32> @cttz_zero_undef_nxv8i32(<vscale x 8 x i32> %va) {
; CHECK-D-NEXT: vsetvli a0, zero, e32, m4, ta, ma
; CHECK-D-NEXT: vrsub.vi v12, v8, 0
; CHECK-D-NEXT: li a0, 52
-; CHECK-D-NEXT: vand.vv v8, v8, v12
-; CHECK-D-NEXT: vfwcvt.f.xu.v v16, v8
-; CHECK-D-NEXT: vnsrl.wx v8, v16, a0
+; CHECK-D-NEXT: vand.vv v16, v8, v12
+; CHECK-D-NEXT: vfwcvt.f.xu.v v8, v16
+; CHECK-D-NEXT: vnsrl.wx v16, v8, a0
; CHECK-D-NEXT: li a0, 1023
-; CHECK-D-NEXT: vsub.vx v8, v8, a0
+; CHECK-D-NEXT: vsub.vx v8, v16, a0
; CHECK-D-NEXT: ret
;
; CHECK-ZVBB-LABEL: cttz_zero_undef_nxv8i32:
diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
index 9f113d5c428ec..71136a6526c44 100644
--- a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
@@ -2727,10 +2727,10 @@ define <vscale x 4 x i8> @vp_cttz_zero_undef_nxv4i8(<vscale x 4 x i8> %va, <vsca
; CHECK-NEXT: li a0, 127
; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vzext.vf2 v9, v8, v0.t
-; CHECK-NEXT: vfwcvt.f.xu.v v10, v9, v0.t
+; CHECK-NEXT: vzext.vf2 v10, v8, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vsrl.vi v8, v10, 23, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
@@ -2754,11 +2754,11 @@ define <vscale x 4 x i8> @vp_cttz_zero_undef_nxv4i8_unmasked(<vscale x 4 x i8> %
; CHECK-NEXT: vrsub.vi v9, v8, 0
; CHECK-NEXT: vand.vv v8, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vzext.vf2 v9, v8
-; CHECK-NEXT: vfwcvt.f.xu.v v10, v9
-; CHECK-NEXT: vnsrl.wi v8, v10, 23
+; CHECK-NEXT: vzext.vf2 v10, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT: vnsrl.wi v10, v8, 23
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnsrl.wi v8, v10, 0
; CHECK-NEXT: li a0, 127
; CHECK-NEXT: vsub.vx v8, v8, a0
; CHECK-NEXT: ret
@@ -2781,10 +2781,10 @@ define <vscale x 8 x i8> @vp_cttz_zero_undef_nxv8i8(<vscale x 8 x i8> %va, <vsca
; CHECK-NEXT: li a0, 127
; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v10, v8, v0.t
-; CHECK-NEXT: vfwcvt.f.xu.v v12, v10, v0.t
+; CHECK-NEXT: vzext.vf2 v12, v8, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vsrl.vi v8, v12, 23, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
@@ -2808,13 +2808,13 @@ define <vscale x 8 x i8> @vp_cttz_zero_undef_nxv8i8_unmasked(<vscale x 8 x i8> %
; CHECK-NEXT: vrsub.vi v9, v8, 0
; CHECK-NEXT: vand.vv v8, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vfwcvt.f.xu.v v12, v10
-; CHECK-NEXT: vnsrl.wi v8, v12, 23
+; CHECK-NEXT: vzext.vf2 v12, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT: vnsrl.wi v12, v8, 23
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vnsrl.wi v8, v12, 0
; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vsub.vx v8, v10, a0
+; CHECK-NEXT: vsub.vx v8, v8, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i8_unmasked:
@@ -2835,10 +2835,10 @@ define <vscale x 16 x i8> @vp_cttz_zero_undef_nxv16i8(<vscale x 16 x i8> %va, <v
; CHECK-NEXT: li a0, 127
; CHECK-NEXT: vand.vv v8, v8, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vzext.vf2 v12, v8, v0.t
-; CHECK-NEXT: vfwcvt.f.xu.v v16, v12, v0.t
+; CHECK-NEXT: vzext.vf2 v16, v8, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vsrl.vi v8, v16, 23, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
@@ -2862,13 +2862,13 @@ define <vscale x 16 x i8> @vp_cttz_zero_undef_nxv16i8_unmasked(<vscale x 16 x i8
; CHECK-NEXT: vrsub.vi v10, v8, 0
; CHECK-NEXT: vand.vv v8, v8, v10
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vzext.vf2 v12, v8
-; CHECK-NEXT: vfwcvt.f.xu.v v16, v12
-; CHECK-NEXT: vnsrl.wi v8, v16, 23
+; CHECK-NEXT: vzext.vf2 v16, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v16
+; CHECK-NEXT: vnsrl.wi v16, v8, 23
; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-NEXT: vnsrl.wi v8, v16, 0
; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vsub.vx v8, v12, a0
+; CHECK-NEXT: vsub.vx v8, v8, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i8_unmasked:
@@ -3101,10 +3101,10 @@ define <vscale x 4 x i16> @vp_cttz_zero_undef_nxv4i16(<vscale x 4 x i16> %va, <v
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t
; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vfwcvt.f.xu.v v10, v8, v0.t
+; CHECK-NEXT: vand.vv v10, v8, v9, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vsrl.vi v8, v10, 23, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t
; CHECK-NEXT: vsub.vx v8, v10, a0, v0.t
@@ -3124,11 +3124,11 @@ define <vscale x 4 x i16> @vp_cttz_zero_undef_nxv4i16_unmasked(<vscale x 4 x i16
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vrsub.vi v9, v8, 0
-; CHECK-NEXT: vand.vv v8, v8, v9
-; CHECK-NEXT: vfwcvt.f.xu.v v10, v8
-; CHECK-NEXT: vnsrl.wi v8, v10, 23
+; CHECK-NEXT: vand.vv v10, v8, v9
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT: vnsrl.wi v10, v8, 23
; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: vsub.vx v8, v10, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i16_unmasked:
@@ -3147,10 +3147,10 @@ define <vscale x 8 x i16> @vp_cttz_zero_undef_nxv8i16(<vscale x 8 x i16> %va, <v
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t
; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vand.vv v8, v8, v10, v0.t
-; CHECK-NEXT: vfwcvt.f.xu.v v12, v8, v0.t
+; CHECK-NEXT: vand.vv v12, v8, v10, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vsrl.vi v8, v12, 23, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t
; CHECK-NEXT: vsub.vx v8, v12, a0, v0.t
@@ -3170,11 +3170,11 @@ define <vscale x 8 x i16> @vp_cttz_zero_undef_nxv8i16_unmasked(<vscale x 8 x i16
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; CHECK-NEXT: vrsub.vi v10, v8, 0
-; CHECK-NEXT: vand.vv v8, v8, v10
-; CHECK-NEXT: vfwcvt.f.xu.v v12, v8
-; CHECK-NEXT: vnsrl.wi v8, v12, 23
+; CHECK-NEXT: vand.vv v12, v8, v10
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT: vnsrl.wi v12, v8, 23
; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: vsub.vx v8, v12, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i16_unmasked:
@@ -3193,10 +3193,10 @@ define <vscale x 16 x i16> @vp_cttz_zero_undef_nxv16i16(<vscale x 16 x i16> %va,
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vrsub.vi v12, v8, 0, v0.t
; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vand.vv v8, v8, v12, v0.t
-; CHECK-NEXT: vfwcvt.f.xu.v v16, v8, v0.t
+; CHECK-NEXT: vand.vv v16, v8, v12, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vsrl.vi v8, v16, 23, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t
; CHECK-NEXT: vsub.vx v8, v16, a0, v0.t
@@ -3216,11 +3216,11 @@ define <vscale x 16 x i16> @vp_cttz_zero_undef_nxv16i16_unmasked(<vscale x 16 x
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vrsub.vi v12, v8, 0
-; CHECK-NEXT: vand.vv v8, v8, v12
-; CHECK-NEXT: vfwcvt.f.xu.v v16, v8
-; CHECK-NEXT: vnsrl.wi v8, v16, 23
+; CHECK-NEXT: vand.vv v16, v8, v12
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v16
+; CHECK-NEXT: vnsrl.wi v16, v8, 23
; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: vsub.vx v8, v16, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i16_unmasked:
@@ -3362,10 +3362,10 @@ define <vscale x 2 x i32> @vp_cttz_zero_undef_nxv2i32(<vscale x 2 x i32> %va, <v
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t
; CHECK-NEXT: li a0, 52
-; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vfwcvt.f.xu.v v10, v8, v0.t
+; CHECK-NEXT: vand.vv v10, v8, v9, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT: vsrl.vx v8, v10, a0, v0.t
+; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t
; CHECK-NEXT: li a0, 1023
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t
@@ -3387,11 +3387,11 @@ define <vscale x 2 x i32> @vp_cttz_zero_undef_nxv2i32_unmasked(<vscale x 2 x i32
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vrsub.vi v9, v8, 0
; CHECK-NEXT: li a0, 52
-; CHECK-NEXT: vand.vv v8, v8, v9
-; CHECK-NEXT: vfwcvt.f.xu.v v10, v8
-; CHECK-NEXT: vnsrl.wx v8, v10, a0
+; CHECK-NEXT: vand.vv v10, v8, v9
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT: vnsrl.wx v10, v8, a0
; CHECK-NEXT: li a0, 1023
-; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: vsub.vx v8, v10, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i32_unmasked:
@@ -3410,10 +3410,10 @@ define <vscale x 4 x i32> @vp_cttz_zero_undef_nxv4i32(<vscale x 4 x i32> %va, <v
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t
; CHECK-NEXT: li a0, 52
-; CHECK-NEXT: vand.vv v8, v8, v10, v0.t
-; CHECK-NEXT: vfwcvt.f.xu.v v12, v8, v0.t
+; CHECK-NEXT: vand.vv v12, v8, v10, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT: vsrl.vx v8, v12, a0, v0.t
+; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t
; CHECK-NEXT: li a0, 1023
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t
@@ -3435,11 +3435,11 @@ define <vscale x 4 x i32> @vp_cttz_zero_undef_nxv4i32_unmasked(<vscale x 4 x i32
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; CHECK-NEXT: vrsub.vi v10, v8, 0
; CHECK-NEXT: li a0, 52
-; CHECK-NEXT: vand.vv v8, v8, v10
-; CHECK-NEXT: vfwcvt.f.xu.v v12, v8
-; CHECK-NEXT: vnsrl.wx v8, v12, a0
+; CHECK-NEXT: vand.vv v12, v8, v10
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT: vnsrl.wx v12, v8, a0
; CHECK-NEXT: li a0, 1023
-; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: vsub.vx v8, v12, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i32_unmasked:
@@ -3458,10 +3458,10 @@ define <vscale x 8 x i32> @vp_cttz_zero_undef_nxv8i32(<vscale x 8 x i32> %va, <v
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; CHECK-NEXT: vrsub.vi v12, v8, 0, v0.t
; CHECK-NEXT: li a0, 52
-; CHECK-NEXT: vand.vv v8, v8, v12, v0.t
-; CHECK-NEXT: vfwcvt.f.xu.v v16, v8, v0.t
+; CHECK-NEXT: vand.vv v16, v8, v12, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vsrl.vx v8, v16, a0, v0.t
+; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t
; CHECK-NEXT: li a0, 1023
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t
@@ -3483,11 +3483,11 @@ define <vscale x 8 x i32> @vp_cttz_zero_undef_nxv8i32_unmasked(<vscale x 8 x i32
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; CHECK-NEXT: vrsub.vi v12, v8, 0
; CHECK-NEXT: li a0, 52
-; CHECK-NEXT: vand.vv v8, v8, v12
-; CHECK-NEXT: vfwcvt.f.xu.v v16, v8
-; CHECK-NEXT: vnsrl.wx v8, v16, a0
+; CHECK-NEXT: vand.vv v16, v8, v12
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v16
+; CHECK-NEXT: vnsrl.wx v16, v8, a0
; CHECK-NEXT: li a0, 1023
-; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: vsub.vx v8, v16, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i32_unmasked:
diff --git a/llvm/test/CodeGen/RISCV/rvv/expandload.ll b/llvm/test/CodeGen/RISCV/rvv/expandload.ll
index 7fa5f808f71ab..25706bdec55c3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/expandload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/expandload.ll
@@ -644,12 +644,12 @@ define <128 x i16> @test_expandload_v128i16(ptr %base, <128 x i1> %mask, <128 x
; CHECK-RV32-NEXT: addi sp, sp, -16
; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16
; CHECK-RV32-NEXT: csrr a1, vlenb
-; CHECK-RV32-NEXT: li a2, 24
-; CHECK-RV32-NEXT: mul a1, a1, a2
+; CHECK-RV32-NEXT: slli a1, a1, 5
; CHECK-RV32-NEXT: sub sp, sp, a1
-; CHECK-RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; CHECK-RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; CHECK-RV32-NEXT: csrr a1, vlenb
-; CHECK-RV32-NEXT: slli a1, a1, 4
+; CHECK-RV32-NEXT: li a2, 24
+; CHECK-RV32-NEXT: mul a1, a1, a2
; CHECK-RV32-NEXT: add a1, sp, a1
; CHECK-RV32-NEXT: addi a1, a1, 16
; CHECK-RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
@@ -662,34 +662,45 @@ define <128 x i16> @test_expandload_v128i16(ptr %base, <128 x i1> %mask, <128 x
; CHECK-RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-RV32-NEXT: vcpop.m a4, v0
; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV32-NEXT: vsrl.vx v6, v0, a2
+; CHECK-RV32-NEXT: vsrl.vx v16, v0, a2
; CHECK-RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-RV32-NEXT: vcpop.m a2, v7
; CHECK-RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma
-; CHECK-RV32-NEXT: vle16.v v16, (a0)
+; CHECK-RV32-NEXT: vle16.v v24, (a0)
+; CHECK-RV32-NEXT: csrr a5, vlenb
+; CHECK-RV32-NEXT: slli a5, a5, 4
+; CHECK-RV32-NEXT: add a5, sp, a5
+; CHECK-RV32-NEXT: addi a5, a5, 16
+; CHECK-RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill
; CHECK-RV32-NEXT: vsetvli zero, a4, e64, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.x.s a4, v6
+; CHECK-RV32-NEXT: vmv.x.s a4, v16
; CHECK-RV32-NEXT: cpop a4, a4
; CHECK-RV32-NEXT: cpop a3, a3
; CHECK-RV32-NEXT: add a3, a3, a4
; CHECK-RV32-NEXT: slli a3, a3, 1
; CHECK-RV32-NEXT: add a0, a0, a3
; CHECK-RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
-; CHECK-RV32-NEXT: vle16.v v24, (a0)
+; CHECK-RV32-NEXT: vle16.v v16, (a0)
; CHECK-RV32-NEXT: csrr a0, vlenb
; CHECK-RV32-NEXT: slli a0, a0, 3
; CHECK-RV32-NEXT: add a0, sp, a0
; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-RV32-NEXT: vsetvli zero, a1, e16, m8, ta, mu
; CHECK-RV32-NEXT: viota.m v24, v0
+; CHECK-RV32-NEXT: csrr a0, vlenb
+; CHECK-RV32-NEXT: slli a0, a0, 4
+; CHECK-RV32-NEXT: add a0, sp, a0
+; CHECK-RV32-NEXT: addi a0, a0, 16
+; CHECK-RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-RV32-NEXT: vrgather.vv v8, v16, v24, v0.t
; CHECK-RV32-NEXT: addi a0, sp, 16
; CHECK-RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-RV32-NEXT: viota.m v8, v7
; CHECK-RV32-NEXT: vmv1r.v v0, v7
; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 4
+; CHECK-RV32-NEXT: li a1, 24
+; CHECK-RV32-NEXT: mul a0, a0, a1
; CHECK-RV32-NEXT: add a0, sp, a0
; CHECK-RV32-NEXT: addi a0, a0, 16
; CHECK-RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
@@ -703,8 +714,7 @@ define <128 x i16> @test_expandload_v128i16(ptr %base, <128 x i1> %mask, <128 x
; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-RV32-NEXT: vmv.v.v v16, v24
; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: li a1, 24
-; CHECK-RV32-NEXT: mul a0, a0, a1
+; CHECK-RV32-NEXT: slli a0, a0, 5
; CHECK-RV32-NEXT: add sp, sp, a0
; CHECK-RV32-NEXT: .cfi_def_cfa sp, 16
; CHECK-RV32-NEXT: addi sp, sp, 16
@@ -1806,22 +1816,22 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_66: # %else242
; CHECK-RV32-NEXT: slli a3, a2, 1
; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV32-NEXT: vslidedown.vi v16, v0, 1
+; CHECK-RV32-NEXT: vslidedown.vi v24, v0, 1
; CHECK-RV32-NEXT: bgez a3, .LBB61_68
; CHECK-RV32-NEXT: # %bb.67: # %cond.load245
; CHECK-RV32-NEXT: lbu a3, 0(a0)
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v9, a3
; CHECK-RV32-NEXT: li a3, 63
; CHECK-RV32-NEXT: li a4, 62
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv1r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv1r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: .LBB61_68: # %else246
; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.x.s a3, v16
+; CHECK-RV32-NEXT: vmv.x.s a3, v24
; CHECK-RV32-NEXT: bgez a2, .LBB61_69
; CHECK-RV32-NEXT: j .LBB61_602
; CHECK-RV32-NEXT: .LBB61_69: # %else250
@@ -1946,34 +1956,34 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_99: # %cond.load369
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 94
; CHECK-RV32-NEXT: li a4, 93
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: .LBB61_100: # %else370
; CHECK-RV32-NEXT: slli a2, a3, 1
; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV32-NEXT: vsrl.vx v16, v16, a1
+; CHECK-RV32-NEXT: vsrl.vx v24, v24, a1
; CHECK-RV32-NEXT: bgez a2, .LBB61_102
; CHECK-RV32-NEXT: # %bb.101: # %cond.load373
; CHECK-RV32-NEXT: lbu a2, 0(a0)
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 95
; CHECK-RV32-NEXT: li a4, 94
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: .LBB61_102: # %else374
; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.x.s a2, v16
+; CHECK-RV32-NEXT: vmv.x.s a2, v24
; CHECK-RV32-NEXT: bgez a3, .LBB61_103
; CHECK-RV32-NEXT: j .LBB61_632
; CHECK-RV32-NEXT: .LBB61_103: # %else378
@@ -2110,22 +2120,22 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_134: # %else498
; CHECK-RV32-NEXT: slli a3, a2, 1
; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV32-NEXT: vslidedown.vi v16, v0, 2
+; CHECK-RV32-NEXT: vslidedown.vi v24, v0, 2
; CHECK-RV32-NEXT: bgez a3, .LBB61_136
; CHECK-RV32-NEXT: # %bb.135: # %cond.load501
; CHECK-RV32-NEXT: lbu a3, 0(a0)
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a3
; CHECK-RV32-NEXT: li a3, 127
; CHECK-RV32-NEXT: li a4, 126
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: .LBB61_136: # %else502
; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.x.s a3, v16
+; CHECK-RV32-NEXT: vmv.x.s a3, v24
; CHECK-RV32-NEXT: bgez a2, .LBB61_137
; CHECK-RV32-NEXT: j .LBB61_662
; CHECK-RV32-NEXT: .LBB61_137: # %else506
@@ -2250,34 +2260,34 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_167: # %cond.load625
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 158
; CHECK-RV32-NEXT: li a4, 157
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: .LBB61_168: # %else626
; CHECK-RV32-NEXT: slli a2, a3, 1
; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV32-NEXT: vsrl.vx v16, v16, a1
+; CHECK-RV32-NEXT: vsrl.vx v24, v24, a1
; CHECK-RV32-NEXT: bgez a2, .LBB61_170
; CHECK-RV32-NEXT: # %bb.169: # %cond.load629
; CHECK-RV32-NEXT: lbu a2, 0(a0)
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 159
; CHECK-RV32-NEXT: li a4, 158
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: .LBB61_170: # %else630
; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.x.s a2, v16
+; CHECK-RV32-NEXT: vmv.x.s a2, v24
; CHECK-RV32-NEXT: bgez a3, .LBB61_171
; CHECK-RV32-NEXT: j .LBB61_692
; CHECK-RV32-NEXT: .LBB61_171: # %else634
@@ -2414,22 +2424,22 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_202: # %else754
; CHECK-RV32-NEXT: slli a3, a2, 1
; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV32-NEXT: vslidedown.vi v16, v0, 3
+; CHECK-RV32-NEXT: vslidedown.vi v24, v0, 3
; CHECK-RV32-NEXT: bgez a3, .LBB61_204
; CHECK-RV32-NEXT: # %bb.203: # %cond.load757
; CHECK-RV32-NEXT: lbu a3, 0(a0)
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a3
; CHECK-RV32-NEXT: li a3, 191
; CHECK-RV32-NEXT: li a4, 190
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: .LBB61_204: # %else758
; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.x.s a3, v16
+; CHECK-RV32-NEXT: vmv.x.s a3, v24
; CHECK-RV32-NEXT: bgez a2, .LBB61_205
; CHECK-RV32-NEXT: j .LBB61_722
; CHECK-RV32-NEXT: .LBB61_205: # %else762
@@ -2554,34 +2564,34 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_235: # %cond.load881
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 222
; CHECK-RV32-NEXT: li a4, 221
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: .LBB61_236: # %else882
; CHECK-RV32-NEXT: slli a2, a3, 1
; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV32-NEXT: vsrl.vx v16, v16, a1
+; CHECK-RV32-NEXT: vsrl.vx v24, v24, a1
; CHECK-RV32-NEXT: bgez a2, .LBB61_238
; CHECK-RV32-NEXT: # %bb.237: # %cond.load885
; CHECK-RV32-NEXT: lbu a2, 0(a0)
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 223
; CHECK-RV32-NEXT: li a4, 222
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: .LBB61_238: # %else886
; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.x.s a2, v16
+; CHECK-RV32-NEXT: vmv.x.s a2, v24
; CHECK-RV32-NEXT: bgez a3, .LBB61_239
; CHECK-RV32-NEXT: j .LBB61_752
; CHECK-RV32-NEXT: .LBB61_239: # %else890
@@ -2718,22 +2728,22 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_270: # %else1010
; CHECK-RV32-NEXT: slli a3, a2, 1
; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV32-NEXT: vslidedown.vi v16, v0, 4
+; CHECK-RV32-NEXT: vslidedown.vi v24, v0, 4
; CHECK-RV32-NEXT: bgez a3, .LBB61_272
; CHECK-RV32-NEXT: # %bb.271: # %cond.load1013
; CHECK-RV32-NEXT: lbu a3, 0(a0)
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a3
; CHECK-RV32-NEXT: li a3, 255
; CHECK-RV32-NEXT: li a4, 254
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: .LBB61_272: # %else1014
; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.x.s a3, v16
+; CHECK-RV32-NEXT: vmv.x.s a3, v24
; CHECK-RV32-NEXT: bgez a2, .LBB61_273
; CHECK-RV32-NEXT: j .LBB61_782
; CHECK-RV32-NEXT: .LBB61_273: # %else1018
@@ -2859,28 +2869,28 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 286
; CHECK-RV32-NEXT: li a4, 285
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: .LBB61_304: # %else1138
; CHECK-RV32-NEXT: slli a2, a3, 1
; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV32-NEXT: vsrl.vx v16, v16, a1
+; CHECK-RV32-NEXT: vsrl.vx v24, v24, a1
; CHECK-RV32-NEXT: bgez a2, .LBB61_306
; CHECK-RV32-NEXT: # %bb.305: # %cond.load1141
; CHECK-RV32-NEXT: lbu a2, 0(a0)
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 287
; CHECK-RV32-NEXT: li a4, 286
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: .LBB61_306: # %else1142
; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.x.s a2, v16
+; CHECK-RV32-NEXT: vmv.x.s a2, v24
; CHECK-RV32-NEXT: bgez a3, .LBB61_307
; CHECK-RV32-NEXT: j .LBB61_812
; CHECK-RV32-NEXT: .LBB61_307: # %else1146
@@ -3015,19 +3025,19 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_338: # %else1266
; CHECK-RV32-NEXT: slli a3, a2, 1
; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV32-NEXT: vslidedown.vi v16, v0, 5
+; CHECK-RV32-NEXT: vslidedown.vi v24, v0, 5
; CHECK-RV32-NEXT: bgez a3, .LBB61_340
; CHECK-RV32-NEXT: # %bb.339: # %cond.load1269
; CHECK-RV32-NEXT: lbu a3, 0(a0)
-; CHECK-RV32-NEXT: vmv.s.x v24, a3
+; CHECK-RV32-NEXT: vmv.s.x v16, a3
; CHECK-RV32-NEXT: li a3, 319
; CHECK-RV32-NEXT: li a4, 318
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: .LBB61_340: # %else1270
; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.x.s a3, v16
+; CHECK-RV32-NEXT: vmv.x.s a3, v24
; CHECK-RV32-NEXT: bgez a2, .LBB61_341
; CHECK-RV32-NEXT: j .LBB61_842
; CHECK-RV32-NEXT: .LBB61_341: # %else1274
@@ -3153,28 +3163,28 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 350
; CHECK-RV32-NEXT: li a4, 349
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: .LBB61_372: # %else1394
; CHECK-RV32-NEXT: slli a2, a3, 1
; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV32-NEXT: vsrl.vx v16, v16, a1
+; CHECK-RV32-NEXT: vsrl.vx v24, v24, a1
; CHECK-RV32-NEXT: bgez a2, .LBB61_374
; CHECK-RV32-NEXT: # %bb.373: # %cond.load1397
; CHECK-RV32-NEXT: lbu a2, 0(a0)
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 351
; CHECK-RV32-NEXT: li a4, 350
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: .LBB61_374: # %else1398
; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.x.s a2, v16
+; CHECK-RV32-NEXT: vmv.x.s a2, v24
; CHECK-RV32-NEXT: bgez a3, .LBB61_375
; CHECK-RV32-NEXT: j .LBB61_872
; CHECK-RV32-NEXT: .LBB61_375: # %else1402
@@ -3309,19 +3319,19 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_406: # %else1522
; CHECK-RV32-NEXT: slli a3, a2, 1
; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV32-NEXT: vslidedown.vi v16, v0, 6
+; CHECK-RV32-NEXT: vslidedown.vi v24, v0, 6
; CHECK-RV32-NEXT: bgez a3, .LBB61_408
; CHECK-RV32-NEXT: # %bb.407: # %cond.load1525
; CHECK-RV32-NEXT: lbu a3, 0(a0)
-; CHECK-RV32-NEXT: vmv.s.x v24, a3
+; CHECK-RV32-NEXT: vmv.s.x v16, a3
; CHECK-RV32-NEXT: li a3, 383
; CHECK-RV32-NEXT: li a4, 382
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: .LBB61_408: # %else1526
; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.x.s a3, v16
+; CHECK-RV32-NEXT: vmv.x.s a3, v24
; CHECK-RV32-NEXT: bgez a2, .LBB61_409
; CHECK-RV32-NEXT: j .LBB61_902
; CHECK-RV32-NEXT: .LBB61_409: # %else1530
@@ -3447,28 +3457,28 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 414
; CHECK-RV32-NEXT: li a4, 413
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: .LBB61_440: # %else1650
; CHECK-RV32-NEXT: slli a2, a3, 1
; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV32-NEXT: vsrl.vx v16, v16, a1
+; CHECK-RV32-NEXT: vsrl.vx v24, v24, a1
; CHECK-RV32-NEXT: bgez a2, .LBB61_442
; CHECK-RV32-NEXT: # %bb.441: # %cond.load1653
; CHECK-RV32-NEXT: lbu a2, 0(a0)
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 415
; CHECK-RV32-NEXT: li a4, 414
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: .LBB61_442: # %else1654
; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.x.s a2, v16
+; CHECK-RV32-NEXT: vmv.x.s a2, v24
; CHECK-RV32-NEXT: bgez a3, .LBB61_443
; CHECK-RV32-NEXT: j .LBB61_932
; CHECK-RV32-NEXT: .LBB61_443: # %else1658
@@ -3603,19 +3613,19 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_474: # %else1778
; CHECK-RV32-NEXT: slli a3, a2, 1
; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV32-NEXT: vslidedown.vi v16, v0, 7
+; CHECK-RV32-NEXT: vslidedown.vi v24, v0, 7
; CHECK-RV32-NEXT: bgez a3, .LBB61_476
; CHECK-RV32-NEXT: # %bb.475: # %cond.load1781
; CHECK-RV32-NEXT: lbu a3, 0(a0)
-; CHECK-RV32-NEXT: vmv.s.x v24, a3
+; CHECK-RV32-NEXT: vmv.s.x v16, a3
; CHECK-RV32-NEXT: li a3, 447
; CHECK-RV32-NEXT: li a4, 446
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: .LBB61_476: # %else1782
; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.x.s a3, v16
+; CHECK-RV32-NEXT: vmv.x.s a3, v24
; CHECK-RV32-NEXT: bgez a2, .LBB61_477
; CHECK-RV32-NEXT: j .LBB61_962
; CHECK-RV32-NEXT: .LBB61_477: # %else1786
@@ -3741,28 +3751,28 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 478
; CHECK-RV32-NEXT: li a4, 477
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: .LBB61_508: # %else1906
; CHECK-RV32-NEXT: slli a2, a3, 1
; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV32-NEXT: vsrl.vx v16, v16, a1
+; CHECK-RV32-NEXT: vsrl.vx v24, v24, a1
; CHECK-RV32-NEXT: bgez a2, .LBB61_510
; CHECK-RV32-NEXT: # %bb.509: # %cond.load1909
; CHECK-RV32-NEXT: lbu a1, 0(a0)
-; CHECK-RV32-NEXT: vmv.s.x v24, a1
+; CHECK-RV32-NEXT: vmv.s.x v16, a1
; CHECK-RV32-NEXT: li a1, 479
; CHECK-RV32-NEXT: li a2, 478
; CHECK-RV32-NEXT: vsetvli zero, a1, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a2
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a2
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: .LBB61_510: # %else1910
; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.x.s a1, v16
+; CHECK-RV32-NEXT: vmv.x.s a1, v24
; CHECK-RV32-NEXT: bgez a3, .LBB61_511
; CHECK-RV32-NEXT: j .LBB61_992
; CHECK-RV32-NEXT: .LBB61_511: # %else1914
@@ -4683,450 +4693,450 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: j .LBB61_66
; CHECK-RV32-NEXT: .LBB61_602: # %cond.load249
; CHECK-RV32-NEXT: lbu a2, 0(a0)
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v9, a2
; CHECK-RV32-NEXT: li a2, 64
; CHECK-RV32-NEXT: li a4, 63
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m1, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv1r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv1r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 1
; CHECK-RV32-NEXT: bnez a2, .LBB61_603
; CHECK-RV32-NEXT: j .LBB61_70
; CHECK-RV32-NEXT: .LBB61_603: # %cond.load253
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 65
; CHECK-RV32-NEXT: li a4, 64
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 2
; CHECK-RV32-NEXT: bnez a2, .LBB61_604
; CHECK-RV32-NEXT: j .LBB61_71
; CHECK-RV32-NEXT: .LBB61_604: # %cond.load257
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 66
; CHECK-RV32-NEXT: li a4, 65
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 4
; CHECK-RV32-NEXT: bnez a2, .LBB61_605
; CHECK-RV32-NEXT: j .LBB61_72
; CHECK-RV32-NEXT: .LBB61_605: # %cond.load261
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 67
; CHECK-RV32-NEXT: li a4, 66
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 8
; CHECK-RV32-NEXT: bnez a2, .LBB61_606
; CHECK-RV32-NEXT: j .LBB61_73
; CHECK-RV32-NEXT: .LBB61_606: # %cond.load265
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 68
; CHECK-RV32-NEXT: li a4, 67
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 16
; CHECK-RV32-NEXT: bnez a2, .LBB61_607
; CHECK-RV32-NEXT: j .LBB61_74
; CHECK-RV32-NEXT: .LBB61_607: # %cond.load269
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 69
; CHECK-RV32-NEXT: li a4, 68
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 32
; CHECK-RV32-NEXT: bnez a2, .LBB61_608
; CHECK-RV32-NEXT: j .LBB61_75
; CHECK-RV32-NEXT: .LBB61_608: # %cond.load273
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 70
; CHECK-RV32-NEXT: li a4, 69
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 64
; CHECK-RV32-NEXT: bnez a2, .LBB61_609
; CHECK-RV32-NEXT: j .LBB61_76
; CHECK-RV32-NEXT: .LBB61_609: # %cond.load277
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 71
; CHECK-RV32-NEXT: li a4, 70
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 128
; CHECK-RV32-NEXT: bnez a2, .LBB61_610
; CHECK-RV32-NEXT: j .LBB61_77
; CHECK-RV32-NEXT: .LBB61_610: # %cond.load281
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 72
; CHECK-RV32-NEXT: li a4, 71
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 256
; CHECK-RV32-NEXT: bnez a2, .LBB61_611
; CHECK-RV32-NEXT: j .LBB61_78
; CHECK-RV32-NEXT: .LBB61_611: # %cond.load285
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 73
; CHECK-RV32-NEXT: li a4, 72
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 512
; CHECK-RV32-NEXT: bnez a2, .LBB61_612
; CHECK-RV32-NEXT: j .LBB61_79
; CHECK-RV32-NEXT: .LBB61_612: # %cond.load289
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 74
; CHECK-RV32-NEXT: li a4, 73
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 1024
; CHECK-RV32-NEXT: bnez a2, .LBB61_613
; CHECK-RV32-NEXT: j .LBB61_80
; CHECK-RV32-NEXT: .LBB61_613: # %cond.load293
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 75
; CHECK-RV32-NEXT: li a4, 74
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 20
; CHECK-RV32-NEXT: bltz a2, .LBB61_614
; CHECK-RV32-NEXT: j .LBB61_81
; CHECK-RV32-NEXT: .LBB61_614: # %cond.load297
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 76
; CHECK-RV32-NEXT: li a4, 75
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 19
; CHECK-RV32-NEXT: bltz a2, .LBB61_615
; CHECK-RV32-NEXT: j .LBB61_82
; CHECK-RV32-NEXT: .LBB61_615: # %cond.load301
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 77
; CHECK-RV32-NEXT: li a4, 76
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 18
; CHECK-RV32-NEXT: bltz a2, .LBB61_616
; CHECK-RV32-NEXT: j .LBB61_83
; CHECK-RV32-NEXT: .LBB61_616: # %cond.load305
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 78
; CHECK-RV32-NEXT: li a4, 77
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 17
; CHECK-RV32-NEXT: bltz a2, .LBB61_617
; CHECK-RV32-NEXT: j .LBB61_84
; CHECK-RV32-NEXT: .LBB61_617: # %cond.load309
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 79
; CHECK-RV32-NEXT: li a4, 78
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 16
; CHECK-RV32-NEXT: bltz a2, .LBB61_618
; CHECK-RV32-NEXT: j .LBB61_85
; CHECK-RV32-NEXT: .LBB61_618: # %cond.load313
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 80
; CHECK-RV32-NEXT: li a4, 79
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 15
; CHECK-RV32-NEXT: bltz a2, .LBB61_619
; CHECK-RV32-NEXT: j .LBB61_86
; CHECK-RV32-NEXT: .LBB61_619: # %cond.load317
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 81
; CHECK-RV32-NEXT: li a4, 80
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 14
; CHECK-RV32-NEXT: bltz a2, .LBB61_620
; CHECK-RV32-NEXT: j .LBB61_87
; CHECK-RV32-NEXT: .LBB61_620: # %cond.load321
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 82
; CHECK-RV32-NEXT: li a4, 81
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 13
; CHECK-RV32-NEXT: bltz a2, .LBB61_621
; CHECK-RV32-NEXT: j .LBB61_88
; CHECK-RV32-NEXT: .LBB61_621: # %cond.load325
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 83
; CHECK-RV32-NEXT: li a4, 82
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 12
; CHECK-RV32-NEXT: bltz a2, .LBB61_622
; CHECK-RV32-NEXT: j .LBB61_89
; CHECK-RV32-NEXT: .LBB61_622: # %cond.load329
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 84
; CHECK-RV32-NEXT: li a4, 83
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 11
; CHECK-RV32-NEXT: bltz a2, .LBB61_623
; CHECK-RV32-NEXT: j .LBB61_90
; CHECK-RV32-NEXT: .LBB61_623: # %cond.load333
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 85
; CHECK-RV32-NEXT: li a4, 84
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 10
; CHECK-RV32-NEXT: bltz a2, .LBB61_624
; CHECK-RV32-NEXT: j .LBB61_91
; CHECK-RV32-NEXT: .LBB61_624: # %cond.load337
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 86
; CHECK-RV32-NEXT: li a4, 85
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 9
; CHECK-RV32-NEXT: bltz a2, .LBB61_625
; CHECK-RV32-NEXT: j .LBB61_92
; CHECK-RV32-NEXT: .LBB61_625: # %cond.load341
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 87
; CHECK-RV32-NEXT: li a4, 86
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 8
; CHECK-RV32-NEXT: bltz a2, .LBB61_626
; CHECK-RV32-NEXT: j .LBB61_93
; CHECK-RV32-NEXT: .LBB61_626: # %cond.load345
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 88
; CHECK-RV32-NEXT: li a4, 87
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 7
; CHECK-RV32-NEXT: bltz a2, .LBB61_627
; CHECK-RV32-NEXT: j .LBB61_94
; CHECK-RV32-NEXT: .LBB61_627: # %cond.load349
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 89
; CHECK-RV32-NEXT: li a4, 88
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 6
; CHECK-RV32-NEXT: bltz a2, .LBB61_628
; CHECK-RV32-NEXT: j .LBB61_95
; CHECK-RV32-NEXT: .LBB61_628: # %cond.load353
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 90
; CHECK-RV32-NEXT: li a4, 89
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 5
; CHECK-RV32-NEXT: bltz a2, .LBB61_629
; CHECK-RV32-NEXT: j .LBB61_96
; CHECK-RV32-NEXT: .LBB61_629: # %cond.load357
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 91
; CHECK-RV32-NEXT: li a4, 90
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 4
; CHECK-RV32-NEXT: bltz a2, .LBB61_630
; CHECK-RV32-NEXT: j .LBB61_97
; CHECK-RV32-NEXT: .LBB61_630: # %cond.load361
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 92
; CHECK-RV32-NEXT: li a4, 91
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 3
; CHECK-RV32-NEXT: bltz a2, .LBB61_631
; CHECK-RV32-NEXT: j .LBB61_98
; CHECK-RV32-NEXT: .LBB61_631: # %cond.load365
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 93
; CHECK-RV32-NEXT: li a4, 92
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 2
; CHECK-RV32-NEXT: bgez a2, .LBB61_1027
; CHECK-RV32-NEXT: j .LBB61_99
@@ -5585,450 +5595,450 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: j .LBB61_134
; CHECK-RV32-NEXT: .LBB61_662: # %cond.load505
; CHECK-RV32-NEXT: lbu a2, 0(a0)
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v10, a2
; CHECK-RV32-NEXT: li a2, 128
; CHECK-RV32-NEXT: li a4, 127
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv2r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
-; CHECK-RV32-NEXT: andi a2, a3, 1
+; CHECK-RV32-NEXT: vmv2r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
+; CHECK-RV32-NEXT: andi a2, a3, 1
; CHECK-RV32-NEXT: bnez a2, .LBB61_663
; CHECK-RV32-NEXT: j .LBB61_138
; CHECK-RV32-NEXT: .LBB61_663: # %cond.load509
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 129
; CHECK-RV32-NEXT: li a4, 128
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 2
; CHECK-RV32-NEXT: bnez a2, .LBB61_664
; CHECK-RV32-NEXT: j .LBB61_139
; CHECK-RV32-NEXT: .LBB61_664: # %cond.load513
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 130
; CHECK-RV32-NEXT: li a4, 129
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 4
; CHECK-RV32-NEXT: bnez a2, .LBB61_665
; CHECK-RV32-NEXT: j .LBB61_140
; CHECK-RV32-NEXT: .LBB61_665: # %cond.load517
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 131
; CHECK-RV32-NEXT: li a4, 130
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 8
; CHECK-RV32-NEXT: bnez a2, .LBB61_666
; CHECK-RV32-NEXT: j .LBB61_141
; CHECK-RV32-NEXT: .LBB61_666: # %cond.load521
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 132
; CHECK-RV32-NEXT: li a4, 131
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 16
; CHECK-RV32-NEXT: bnez a2, .LBB61_667
; CHECK-RV32-NEXT: j .LBB61_142
; CHECK-RV32-NEXT: .LBB61_667: # %cond.load525
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 133
; CHECK-RV32-NEXT: li a4, 132
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 32
; CHECK-RV32-NEXT: bnez a2, .LBB61_668
; CHECK-RV32-NEXT: j .LBB61_143
; CHECK-RV32-NEXT: .LBB61_668: # %cond.load529
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 134
; CHECK-RV32-NEXT: li a4, 133
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 64
; CHECK-RV32-NEXT: bnez a2, .LBB61_669
; CHECK-RV32-NEXT: j .LBB61_144
; CHECK-RV32-NEXT: .LBB61_669: # %cond.load533
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 135
; CHECK-RV32-NEXT: li a4, 134
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 128
; CHECK-RV32-NEXT: bnez a2, .LBB61_670
; CHECK-RV32-NEXT: j .LBB61_145
; CHECK-RV32-NEXT: .LBB61_670: # %cond.load537
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 136
; CHECK-RV32-NEXT: li a4, 135
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 256
; CHECK-RV32-NEXT: bnez a2, .LBB61_671
; CHECK-RV32-NEXT: j .LBB61_146
; CHECK-RV32-NEXT: .LBB61_671: # %cond.load541
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 137
; CHECK-RV32-NEXT: li a4, 136
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 512
; CHECK-RV32-NEXT: bnez a2, .LBB61_672
; CHECK-RV32-NEXT: j .LBB61_147
; CHECK-RV32-NEXT: .LBB61_672: # %cond.load545
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 138
; CHECK-RV32-NEXT: li a4, 137
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 1024
; CHECK-RV32-NEXT: bnez a2, .LBB61_673
; CHECK-RV32-NEXT: j .LBB61_148
; CHECK-RV32-NEXT: .LBB61_673: # %cond.load549
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 139
; CHECK-RV32-NEXT: li a4, 138
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 20
; CHECK-RV32-NEXT: bltz a2, .LBB61_674
; CHECK-RV32-NEXT: j .LBB61_149
; CHECK-RV32-NEXT: .LBB61_674: # %cond.load553
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 140
; CHECK-RV32-NEXT: li a4, 139
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 19
; CHECK-RV32-NEXT: bltz a2, .LBB61_675
; CHECK-RV32-NEXT: j .LBB61_150
; CHECK-RV32-NEXT: .LBB61_675: # %cond.load557
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 141
; CHECK-RV32-NEXT: li a4, 140
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 18
; CHECK-RV32-NEXT: bltz a2, .LBB61_676
; CHECK-RV32-NEXT: j .LBB61_151
; CHECK-RV32-NEXT: .LBB61_676: # %cond.load561
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 142
; CHECK-RV32-NEXT: li a4, 141
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 17
; CHECK-RV32-NEXT: bltz a2, .LBB61_677
; CHECK-RV32-NEXT: j .LBB61_152
; CHECK-RV32-NEXT: .LBB61_677: # %cond.load565
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 143
; CHECK-RV32-NEXT: li a4, 142
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 16
; CHECK-RV32-NEXT: bltz a2, .LBB61_678
; CHECK-RV32-NEXT: j .LBB61_153
; CHECK-RV32-NEXT: .LBB61_678: # %cond.load569
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 144
; CHECK-RV32-NEXT: li a4, 143
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 15
; CHECK-RV32-NEXT: bltz a2, .LBB61_679
; CHECK-RV32-NEXT: j .LBB61_154
; CHECK-RV32-NEXT: .LBB61_679: # %cond.load573
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 145
; CHECK-RV32-NEXT: li a4, 144
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 14
; CHECK-RV32-NEXT: bltz a2, .LBB61_680
; CHECK-RV32-NEXT: j .LBB61_155
; CHECK-RV32-NEXT: .LBB61_680: # %cond.load577
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 146
; CHECK-RV32-NEXT: li a4, 145
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 13
; CHECK-RV32-NEXT: bltz a2, .LBB61_681
; CHECK-RV32-NEXT: j .LBB61_156
; CHECK-RV32-NEXT: .LBB61_681: # %cond.load581
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 147
; CHECK-RV32-NEXT: li a4, 146
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 12
; CHECK-RV32-NEXT: bltz a2, .LBB61_682
; CHECK-RV32-NEXT: j .LBB61_157
; CHECK-RV32-NEXT: .LBB61_682: # %cond.load585
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 148
; CHECK-RV32-NEXT: li a4, 147
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 11
; CHECK-RV32-NEXT: bltz a2, .LBB61_683
; CHECK-RV32-NEXT: j .LBB61_158
; CHECK-RV32-NEXT: .LBB61_683: # %cond.load589
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 149
; CHECK-RV32-NEXT: li a4, 148
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 10
; CHECK-RV32-NEXT: bltz a2, .LBB61_684
; CHECK-RV32-NEXT: j .LBB61_159
; CHECK-RV32-NEXT: .LBB61_684: # %cond.load593
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 150
; CHECK-RV32-NEXT: li a4, 149
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 9
; CHECK-RV32-NEXT: bltz a2, .LBB61_685
; CHECK-RV32-NEXT: j .LBB61_160
; CHECK-RV32-NEXT: .LBB61_685: # %cond.load597
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 151
; CHECK-RV32-NEXT: li a4, 150
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 8
; CHECK-RV32-NEXT: bltz a2, .LBB61_686
; CHECK-RV32-NEXT: j .LBB61_161
; CHECK-RV32-NEXT: .LBB61_686: # %cond.load601
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 152
; CHECK-RV32-NEXT: li a4, 151
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 7
; CHECK-RV32-NEXT: bltz a2, .LBB61_687
; CHECK-RV32-NEXT: j .LBB61_162
; CHECK-RV32-NEXT: .LBB61_687: # %cond.load605
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 153
; CHECK-RV32-NEXT: li a4, 152
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 6
; CHECK-RV32-NEXT: bltz a2, .LBB61_688
; CHECK-RV32-NEXT: j .LBB61_163
; CHECK-RV32-NEXT: .LBB61_688: # %cond.load609
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 154
; CHECK-RV32-NEXT: li a4, 153
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 5
; CHECK-RV32-NEXT: bltz a2, .LBB61_689
; CHECK-RV32-NEXT: j .LBB61_164
; CHECK-RV32-NEXT: .LBB61_689: # %cond.load613
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 155
; CHECK-RV32-NEXT: li a4, 154
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 4
; CHECK-RV32-NEXT: bltz a2, .LBB61_690
; CHECK-RV32-NEXT: j .LBB61_165
; CHECK-RV32-NEXT: .LBB61_690: # %cond.load617
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 156
; CHECK-RV32-NEXT: li a4, 155
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 3
; CHECK-RV32-NEXT: bltz a2, .LBB61_691
; CHECK-RV32-NEXT: j .LBB61_166
; CHECK-RV32-NEXT: .LBB61_691: # %cond.load621
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 157
; CHECK-RV32-NEXT: li a4, 156
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 2
; CHECK-RV32-NEXT: bgez a2, .LBB61_1029
; CHECK-RV32-NEXT: j .LBB61_167
@@ -6487,450 +6497,450 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: j .LBB61_202
; CHECK-RV32-NEXT: .LBB61_722: # %cond.load761
; CHECK-RV32-NEXT: lbu a2, 0(a0)
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 192
; CHECK-RV32-NEXT: li a4, 191
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 1
; CHECK-RV32-NEXT: bnez a2, .LBB61_723
; CHECK-RV32-NEXT: j .LBB61_206
; CHECK-RV32-NEXT: .LBB61_723: # %cond.load765
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 193
; CHECK-RV32-NEXT: li a4, 192
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 2
; CHECK-RV32-NEXT: bnez a2, .LBB61_724
; CHECK-RV32-NEXT: j .LBB61_207
; CHECK-RV32-NEXT: .LBB61_724: # %cond.load769
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 194
; CHECK-RV32-NEXT: li a4, 193
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 4
; CHECK-RV32-NEXT: bnez a2, .LBB61_725
; CHECK-RV32-NEXT: j .LBB61_208
; CHECK-RV32-NEXT: .LBB61_725: # %cond.load773
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 195
; CHECK-RV32-NEXT: li a4, 194
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 8
; CHECK-RV32-NEXT: bnez a2, .LBB61_726
; CHECK-RV32-NEXT: j .LBB61_209
; CHECK-RV32-NEXT: .LBB61_726: # %cond.load777
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 196
; CHECK-RV32-NEXT: li a4, 195
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 16
; CHECK-RV32-NEXT: bnez a2, .LBB61_727
; CHECK-RV32-NEXT: j .LBB61_210
; CHECK-RV32-NEXT: .LBB61_727: # %cond.load781
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 197
; CHECK-RV32-NEXT: li a4, 196
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 32
; CHECK-RV32-NEXT: bnez a2, .LBB61_728
; CHECK-RV32-NEXT: j .LBB61_211
; CHECK-RV32-NEXT: .LBB61_728: # %cond.load785
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 198
; CHECK-RV32-NEXT: li a4, 197
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 64
; CHECK-RV32-NEXT: bnez a2, .LBB61_729
; CHECK-RV32-NEXT: j .LBB61_212
; CHECK-RV32-NEXT: .LBB61_729: # %cond.load789
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 199
; CHECK-RV32-NEXT: li a4, 198
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 128
; CHECK-RV32-NEXT: bnez a2, .LBB61_730
; CHECK-RV32-NEXT: j .LBB61_213
; CHECK-RV32-NEXT: .LBB61_730: # %cond.load793
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 200
; CHECK-RV32-NEXT: li a4, 199
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 256
; CHECK-RV32-NEXT: bnez a2, .LBB61_731
; CHECK-RV32-NEXT: j .LBB61_214
; CHECK-RV32-NEXT: .LBB61_731: # %cond.load797
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 201
; CHECK-RV32-NEXT: li a4, 200
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 512
; CHECK-RV32-NEXT: bnez a2, .LBB61_732
; CHECK-RV32-NEXT: j .LBB61_215
; CHECK-RV32-NEXT: .LBB61_732: # %cond.load801
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 202
; CHECK-RV32-NEXT: li a4, 201
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 1024
; CHECK-RV32-NEXT: bnez a2, .LBB61_733
; CHECK-RV32-NEXT: j .LBB61_216
; CHECK-RV32-NEXT: .LBB61_733: # %cond.load805
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 203
; CHECK-RV32-NEXT: li a4, 202
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 20
; CHECK-RV32-NEXT: bltz a2, .LBB61_734
; CHECK-RV32-NEXT: j .LBB61_217
; CHECK-RV32-NEXT: .LBB61_734: # %cond.load809
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 204
; CHECK-RV32-NEXT: li a4, 203
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 19
; CHECK-RV32-NEXT: bltz a2, .LBB61_735
; CHECK-RV32-NEXT: j .LBB61_218
; CHECK-RV32-NEXT: .LBB61_735: # %cond.load813
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 205
; CHECK-RV32-NEXT: li a4, 204
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 18
; CHECK-RV32-NEXT: bltz a2, .LBB61_736
; CHECK-RV32-NEXT: j .LBB61_219
; CHECK-RV32-NEXT: .LBB61_736: # %cond.load817
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 206
; CHECK-RV32-NEXT: li a4, 205
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 17
; CHECK-RV32-NEXT: bltz a2, .LBB61_737
; CHECK-RV32-NEXT: j .LBB61_220
; CHECK-RV32-NEXT: .LBB61_737: # %cond.load821
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 207
; CHECK-RV32-NEXT: li a4, 206
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 16
; CHECK-RV32-NEXT: bltz a2, .LBB61_738
; CHECK-RV32-NEXT: j .LBB61_221
; CHECK-RV32-NEXT: .LBB61_738: # %cond.load825
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 208
; CHECK-RV32-NEXT: li a4, 207
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 15
; CHECK-RV32-NEXT: bltz a2, .LBB61_739
; CHECK-RV32-NEXT: j .LBB61_222
; CHECK-RV32-NEXT: .LBB61_739: # %cond.load829
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 209
; CHECK-RV32-NEXT: li a4, 208
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 14
; CHECK-RV32-NEXT: bltz a2, .LBB61_740
; CHECK-RV32-NEXT: j .LBB61_223
; CHECK-RV32-NEXT: .LBB61_740: # %cond.load833
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 210
; CHECK-RV32-NEXT: li a4, 209
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 13
; CHECK-RV32-NEXT: bltz a2, .LBB61_741
; CHECK-RV32-NEXT: j .LBB61_224
; CHECK-RV32-NEXT: .LBB61_741: # %cond.load837
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 211
; CHECK-RV32-NEXT: li a4, 210
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 12
; CHECK-RV32-NEXT: bltz a2, .LBB61_742
; CHECK-RV32-NEXT: j .LBB61_225
; CHECK-RV32-NEXT: .LBB61_742: # %cond.load841
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 212
; CHECK-RV32-NEXT: li a4, 211
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 11
; CHECK-RV32-NEXT: bltz a2, .LBB61_743
; CHECK-RV32-NEXT: j .LBB61_226
; CHECK-RV32-NEXT: .LBB61_743: # %cond.load845
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 213
; CHECK-RV32-NEXT: li a4, 212
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 10
; CHECK-RV32-NEXT: bltz a2, .LBB61_744
; CHECK-RV32-NEXT: j .LBB61_227
; CHECK-RV32-NEXT: .LBB61_744: # %cond.load849
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 214
; CHECK-RV32-NEXT: li a4, 213
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 9
; CHECK-RV32-NEXT: bltz a2, .LBB61_745
; CHECK-RV32-NEXT: j .LBB61_228
; CHECK-RV32-NEXT: .LBB61_745: # %cond.load853
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 215
; CHECK-RV32-NEXT: li a4, 214
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 8
; CHECK-RV32-NEXT: bltz a2, .LBB61_746
; CHECK-RV32-NEXT: j .LBB61_229
; CHECK-RV32-NEXT: .LBB61_746: # %cond.load857
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 216
; CHECK-RV32-NEXT: li a4, 215
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 7
; CHECK-RV32-NEXT: bltz a2, .LBB61_747
; CHECK-RV32-NEXT: j .LBB61_230
; CHECK-RV32-NEXT: .LBB61_747: # %cond.load861
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 217
; CHECK-RV32-NEXT: li a4, 216
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 6
; CHECK-RV32-NEXT: bltz a2, .LBB61_748
; CHECK-RV32-NEXT: j .LBB61_231
; CHECK-RV32-NEXT: .LBB61_748: # %cond.load865
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 218
; CHECK-RV32-NEXT: li a4, 217
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 5
; CHECK-RV32-NEXT: bltz a2, .LBB61_749
; CHECK-RV32-NEXT: j .LBB61_232
; CHECK-RV32-NEXT: .LBB61_749: # %cond.load869
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 219
; CHECK-RV32-NEXT: li a4, 218
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 4
; CHECK-RV32-NEXT: bltz a2, .LBB61_750
; CHECK-RV32-NEXT: j .LBB61_233
; CHECK-RV32-NEXT: .LBB61_750: # %cond.load873
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 220
; CHECK-RV32-NEXT: li a4, 219
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 3
; CHECK-RV32-NEXT: bltz a2, .LBB61_751
; CHECK-RV32-NEXT: j .LBB61_234
; CHECK-RV32-NEXT: .LBB61_751: # %cond.load877
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 221
; CHECK-RV32-NEXT: li a4, 220
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: slli a2, a3, 2
; CHECK-RV32-NEXT: bgez a2, .LBB61_1031
; CHECK-RV32-NEXT: j .LBB61_235
@@ -7389,15 +7399,15 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: j .LBB61_270
; CHECK-RV32-NEXT: .LBB61_782: # %cond.load1017
; CHECK-RV32-NEXT: lbu a2, 0(a0)
-; CHECK-RV32-NEXT: vmv8r.v v24, v8
+; CHECK-RV32-NEXT: vmv8r.v v16, v8
; CHECK-RV32-NEXT: vmv.s.x v12, a2
; CHECK-RV32-NEXT: li a2, 256
; CHECK-RV32-NEXT: li a4, 255
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: vmv4r.v v24, v8
-; CHECK-RV32-NEXT: vmv8r.v v8, v24
+; CHECK-RV32-NEXT: vmv4r.v v16, v8
+; CHECK-RV32-NEXT: vmv8r.v v8, v16
; CHECK-RV32-NEXT: andi a2, a3, 1
; CHECK-RV32-NEXT: bnez a2, .LBB61_783
; CHECK-RV32-NEXT: j .LBB61_274
@@ -7405,11 +7415,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 257
; CHECK-RV32-NEXT: li a4, 256
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 2
; CHECK-RV32-NEXT: bnez a2, .LBB61_784
@@ -7418,11 +7428,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 258
; CHECK-RV32-NEXT: li a4, 257
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 4
; CHECK-RV32-NEXT: bnez a2, .LBB61_785
@@ -7431,11 +7441,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 259
; CHECK-RV32-NEXT: li a4, 258
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 8
; CHECK-RV32-NEXT: bnez a2, .LBB61_786
@@ -7444,11 +7454,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 260
; CHECK-RV32-NEXT: li a4, 259
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 16
; CHECK-RV32-NEXT: bnez a2, .LBB61_787
@@ -7457,11 +7467,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 261
; CHECK-RV32-NEXT: li a4, 260
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 32
; CHECK-RV32-NEXT: bnez a2, .LBB61_788
@@ -7470,11 +7480,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 262
; CHECK-RV32-NEXT: li a4, 261
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 64
; CHECK-RV32-NEXT: bnez a2, .LBB61_789
@@ -7483,11 +7493,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 263
; CHECK-RV32-NEXT: li a4, 262
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 128
; CHECK-RV32-NEXT: bnez a2, .LBB61_790
@@ -7496,11 +7506,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 264
; CHECK-RV32-NEXT: li a4, 263
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 256
; CHECK-RV32-NEXT: bnez a2, .LBB61_791
@@ -7509,11 +7519,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 265
; CHECK-RV32-NEXT: li a4, 264
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 512
; CHECK-RV32-NEXT: bnez a2, .LBB61_792
@@ -7522,11 +7532,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 266
; CHECK-RV32-NEXT: li a4, 265
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 1024
; CHECK-RV32-NEXT: bnez a2, .LBB61_793
@@ -7535,11 +7545,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 267
; CHECK-RV32-NEXT: li a4, 266
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 20
; CHECK-RV32-NEXT: bltz a2, .LBB61_794
@@ -7548,11 +7558,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 268
; CHECK-RV32-NEXT: li a4, 267
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 19
; CHECK-RV32-NEXT: bltz a2, .LBB61_795
@@ -7561,11 +7571,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 269
; CHECK-RV32-NEXT: li a4, 268
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 18
; CHECK-RV32-NEXT: bltz a2, .LBB61_796
@@ -7574,11 +7584,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 270
; CHECK-RV32-NEXT: li a4, 269
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 17
; CHECK-RV32-NEXT: bltz a2, .LBB61_797
@@ -7587,11 +7597,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 271
; CHECK-RV32-NEXT: li a4, 270
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 16
; CHECK-RV32-NEXT: bltz a2, .LBB61_798
@@ -7600,11 +7610,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 272
; CHECK-RV32-NEXT: li a4, 271
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 15
; CHECK-RV32-NEXT: bltz a2, .LBB61_799
@@ -7613,11 +7623,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 273
; CHECK-RV32-NEXT: li a4, 272
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 14
; CHECK-RV32-NEXT: bltz a2, .LBB61_800
@@ -7626,11 +7636,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 274
; CHECK-RV32-NEXT: li a4, 273
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 13
; CHECK-RV32-NEXT: bltz a2, .LBB61_801
@@ -7639,11 +7649,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 275
; CHECK-RV32-NEXT: li a4, 274
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 12
; CHECK-RV32-NEXT: bltz a2, .LBB61_802
@@ -7652,11 +7662,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 276
; CHECK-RV32-NEXT: li a4, 275
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 11
; CHECK-RV32-NEXT: bltz a2, .LBB61_803
@@ -7665,11 +7675,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 277
; CHECK-RV32-NEXT: li a4, 276
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 10
; CHECK-RV32-NEXT: bltz a2, .LBB61_804
@@ -7678,11 +7688,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 278
; CHECK-RV32-NEXT: li a4, 277
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 9
; CHECK-RV32-NEXT: bltz a2, .LBB61_805
@@ -7691,11 +7701,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 279
; CHECK-RV32-NEXT: li a4, 278
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 8
; CHECK-RV32-NEXT: bltz a2, .LBB61_806
@@ -7704,11 +7714,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 280
; CHECK-RV32-NEXT: li a4, 279
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 7
; CHECK-RV32-NEXT: bltz a2, .LBB61_807
@@ -7717,11 +7727,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 281
; CHECK-RV32-NEXT: li a4, 280
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 6
; CHECK-RV32-NEXT: bltz a2, .LBB61_808
@@ -7730,11 +7740,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 282
; CHECK-RV32-NEXT: li a4, 281
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 5
; CHECK-RV32-NEXT: bltz a2, .LBB61_809
@@ -7743,11 +7753,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 283
; CHECK-RV32-NEXT: li a4, 282
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 4
; CHECK-RV32-NEXT: bltz a2, .LBB61_810
@@ -7756,11 +7766,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 284
; CHECK-RV32-NEXT: li a4, 283
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 3
; CHECK-RV32-NEXT: bltz a2, .LBB61_811
@@ -7769,11 +7779,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 285
; CHECK-RV32-NEXT: li a4, 284
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 2
; CHECK-RV32-NEXT: bgez a2, .LBB61_1033
@@ -8172,11 +8182,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: j .LBB61_338
; CHECK-RV32-NEXT: .LBB61_842: # %cond.load1273
; CHECK-RV32-NEXT: lbu a2, 0(a0)
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 320
; CHECK-RV32-NEXT: li a4, 319
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 1
; CHECK-RV32-NEXT: bnez a2, .LBB61_843
@@ -8185,11 +8195,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 321
; CHECK-RV32-NEXT: li a4, 320
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 2
; CHECK-RV32-NEXT: bnez a2, .LBB61_844
@@ -8198,11 +8208,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 322
; CHECK-RV32-NEXT: li a4, 321
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 4
; CHECK-RV32-NEXT: bnez a2, .LBB61_845
@@ -8211,11 +8221,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 323
; CHECK-RV32-NEXT: li a4, 322
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 8
; CHECK-RV32-NEXT: bnez a2, .LBB61_846
@@ -8224,11 +8234,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 324
; CHECK-RV32-NEXT: li a4, 323
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 16
; CHECK-RV32-NEXT: bnez a2, .LBB61_847
@@ -8237,11 +8247,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 325
; CHECK-RV32-NEXT: li a4, 324
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 32
; CHECK-RV32-NEXT: bnez a2, .LBB61_848
@@ -8250,11 +8260,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 326
; CHECK-RV32-NEXT: li a4, 325
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 64
; CHECK-RV32-NEXT: bnez a2, .LBB61_849
@@ -8263,11 +8273,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 327
; CHECK-RV32-NEXT: li a4, 326
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 128
; CHECK-RV32-NEXT: bnez a2, .LBB61_850
@@ -8276,11 +8286,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 328
; CHECK-RV32-NEXT: li a4, 327
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 256
; CHECK-RV32-NEXT: bnez a2, .LBB61_851
@@ -8289,11 +8299,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 329
; CHECK-RV32-NEXT: li a4, 328
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 512
; CHECK-RV32-NEXT: bnez a2, .LBB61_852
@@ -8302,11 +8312,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 330
; CHECK-RV32-NEXT: li a4, 329
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 1024
; CHECK-RV32-NEXT: bnez a2, .LBB61_853
@@ -8315,11 +8325,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 331
; CHECK-RV32-NEXT: li a4, 330
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 20
; CHECK-RV32-NEXT: bltz a2, .LBB61_854
@@ -8328,11 +8338,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 332
; CHECK-RV32-NEXT: li a4, 331
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 19
; CHECK-RV32-NEXT: bltz a2, .LBB61_855
@@ -8341,11 +8351,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 333
; CHECK-RV32-NEXT: li a4, 332
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 18
; CHECK-RV32-NEXT: bltz a2, .LBB61_856
@@ -8354,11 +8364,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 334
; CHECK-RV32-NEXT: li a4, 333
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 17
; CHECK-RV32-NEXT: bltz a2, .LBB61_857
@@ -8367,11 +8377,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 335
; CHECK-RV32-NEXT: li a4, 334
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 16
; CHECK-RV32-NEXT: bltz a2, .LBB61_858
@@ -8380,11 +8390,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 336
; CHECK-RV32-NEXT: li a4, 335
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 15
; CHECK-RV32-NEXT: bltz a2, .LBB61_859
@@ -8393,11 +8403,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 337
; CHECK-RV32-NEXT: li a4, 336
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 14
; CHECK-RV32-NEXT: bltz a2, .LBB61_860
@@ -8406,11 +8416,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 338
; CHECK-RV32-NEXT: li a4, 337
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 13
; CHECK-RV32-NEXT: bltz a2, .LBB61_861
@@ -8419,11 +8429,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 339
; CHECK-RV32-NEXT: li a4, 338
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 12
; CHECK-RV32-NEXT: bltz a2, .LBB61_862
@@ -8432,11 +8442,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 340
; CHECK-RV32-NEXT: li a4, 339
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 11
; CHECK-RV32-NEXT: bltz a2, .LBB61_863
@@ -8445,11 +8455,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 341
; CHECK-RV32-NEXT: li a4, 340
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 10
; CHECK-RV32-NEXT: bltz a2, .LBB61_864
@@ -8458,11 +8468,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 342
; CHECK-RV32-NEXT: li a4, 341
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 9
; CHECK-RV32-NEXT: bltz a2, .LBB61_865
@@ -8471,11 +8481,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 343
; CHECK-RV32-NEXT: li a4, 342
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 8
; CHECK-RV32-NEXT: bltz a2, .LBB61_866
@@ -8484,11 +8494,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 344
; CHECK-RV32-NEXT: li a4, 343
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 7
; CHECK-RV32-NEXT: bltz a2, .LBB61_867
@@ -8497,11 +8507,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 345
; CHECK-RV32-NEXT: li a4, 344
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 6
; CHECK-RV32-NEXT: bltz a2, .LBB61_868
@@ -8510,11 +8520,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 346
; CHECK-RV32-NEXT: li a4, 345
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 5
; CHECK-RV32-NEXT: bltz a2, .LBB61_869
@@ -8523,11 +8533,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 347
; CHECK-RV32-NEXT: li a4, 346
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 4
; CHECK-RV32-NEXT: bltz a2, .LBB61_870
@@ -8536,11 +8546,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 348
; CHECK-RV32-NEXT: li a4, 347
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 3
; CHECK-RV32-NEXT: bltz a2, .LBB61_871
@@ -8549,11 +8559,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 349
; CHECK-RV32-NEXT: li a4, 348
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 2
; CHECK-RV32-NEXT: bgez a2, .LBB61_1035
@@ -8952,11 +8962,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: j .LBB61_406
; CHECK-RV32-NEXT: .LBB61_902: # %cond.load1529
; CHECK-RV32-NEXT: lbu a2, 0(a0)
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 384
; CHECK-RV32-NEXT: li a4, 383
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 1
; CHECK-RV32-NEXT: bnez a2, .LBB61_903
@@ -8965,11 +8975,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 385
; CHECK-RV32-NEXT: li a4, 384
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 2
; CHECK-RV32-NEXT: bnez a2, .LBB61_904
@@ -8978,11 +8988,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 386
; CHECK-RV32-NEXT: li a4, 385
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 4
; CHECK-RV32-NEXT: bnez a2, .LBB61_905
@@ -8991,11 +9001,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 387
; CHECK-RV32-NEXT: li a4, 386
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 8
; CHECK-RV32-NEXT: bnez a2, .LBB61_906
@@ -9004,11 +9014,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 388
; CHECK-RV32-NEXT: li a4, 387
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 16
; CHECK-RV32-NEXT: bnez a2, .LBB61_907
@@ -9017,11 +9027,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 389
; CHECK-RV32-NEXT: li a4, 388
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 32
; CHECK-RV32-NEXT: bnez a2, .LBB61_908
@@ -9030,11 +9040,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 390
; CHECK-RV32-NEXT: li a4, 389
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 64
; CHECK-RV32-NEXT: bnez a2, .LBB61_909
@@ -9043,11 +9053,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 391
; CHECK-RV32-NEXT: li a4, 390
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 128
; CHECK-RV32-NEXT: bnez a2, .LBB61_910
@@ -9056,11 +9066,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 392
; CHECK-RV32-NEXT: li a4, 391
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 256
; CHECK-RV32-NEXT: bnez a2, .LBB61_911
@@ -9069,11 +9079,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 393
; CHECK-RV32-NEXT: li a4, 392
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 512
; CHECK-RV32-NEXT: bnez a2, .LBB61_912
@@ -9082,11 +9092,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 394
; CHECK-RV32-NEXT: li a4, 393
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 1024
; CHECK-RV32-NEXT: bnez a2, .LBB61_913
@@ -9095,11 +9105,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 395
; CHECK-RV32-NEXT: li a4, 394
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 20
; CHECK-RV32-NEXT: bltz a2, .LBB61_914
@@ -9108,11 +9118,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 396
; CHECK-RV32-NEXT: li a4, 395
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 19
; CHECK-RV32-NEXT: bltz a2, .LBB61_915
@@ -9121,11 +9131,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 397
; CHECK-RV32-NEXT: li a4, 396
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 18
; CHECK-RV32-NEXT: bltz a2, .LBB61_916
@@ -9134,11 +9144,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 398
; CHECK-RV32-NEXT: li a4, 397
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 17
; CHECK-RV32-NEXT: bltz a2, .LBB61_917
@@ -9147,11 +9157,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 399
; CHECK-RV32-NEXT: li a4, 398
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 16
; CHECK-RV32-NEXT: bltz a2, .LBB61_918
@@ -9160,11 +9170,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 400
; CHECK-RV32-NEXT: li a4, 399
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 15
; CHECK-RV32-NEXT: bltz a2, .LBB61_919
@@ -9173,11 +9183,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 401
; CHECK-RV32-NEXT: li a4, 400
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 14
; CHECK-RV32-NEXT: bltz a2, .LBB61_920
@@ -9186,11 +9196,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 402
; CHECK-RV32-NEXT: li a4, 401
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 13
; CHECK-RV32-NEXT: bltz a2, .LBB61_921
@@ -9199,11 +9209,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 403
; CHECK-RV32-NEXT: li a4, 402
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 12
; CHECK-RV32-NEXT: bltz a2, .LBB61_922
@@ -9212,11 +9222,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 404
; CHECK-RV32-NEXT: li a4, 403
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 11
; CHECK-RV32-NEXT: bltz a2, .LBB61_923
@@ -9225,11 +9235,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 405
; CHECK-RV32-NEXT: li a4, 404
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 10
; CHECK-RV32-NEXT: bltz a2, .LBB61_924
@@ -9238,11 +9248,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 406
; CHECK-RV32-NEXT: li a4, 405
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 9
; CHECK-RV32-NEXT: bltz a2, .LBB61_925
@@ -9251,11 +9261,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 407
; CHECK-RV32-NEXT: li a4, 406
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 8
; CHECK-RV32-NEXT: bltz a2, .LBB61_926
@@ -9264,11 +9274,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 408
; CHECK-RV32-NEXT: li a4, 407
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 7
; CHECK-RV32-NEXT: bltz a2, .LBB61_927
@@ -9277,11 +9287,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 409
; CHECK-RV32-NEXT: li a4, 408
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 6
; CHECK-RV32-NEXT: bltz a2, .LBB61_928
@@ -9290,11 +9300,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 410
; CHECK-RV32-NEXT: li a4, 409
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 5
; CHECK-RV32-NEXT: bltz a2, .LBB61_929
@@ -9303,11 +9313,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 411
; CHECK-RV32-NEXT: li a4, 410
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 4
; CHECK-RV32-NEXT: bltz a2, .LBB61_930
@@ -9316,11 +9326,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 412
; CHECK-RV32-NEXT: li a4, 411
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 3
; CHECK-RV32-NEXT: bltz a2, .LBB61_931
@@ -9329,11 +9339,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 413
; CHECK-RV32-NEXT: li a4, 412
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 2
; CHECK-RV32-NEXT: bgez a2, .LBB61_1037
@@ -9732,11 +9742,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: j .LBB61_474
; CHECK-RV32-NEXT: .LBB61_962: # %cond.load1785
; CHECK-RV32-NEXT: lbu a2, 0(a0)
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 448
; CHECK-RV32-NEXT: li a4, 447
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 1
; CHECK-RV32-NEXT: bnez a2, .LBB61_963
@@ -9745,11 +9755,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 449
; CHECK-RV32-NEXT: li a4, 448
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 2
; CHECK-RV32-NEXT: bnez a2, .LBB61_964
@@ -9758,11 +9768,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 450
; CHECK-RV32-NEXT: li a4, 449
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 4
; CHECK-RV32-NEXT: bnez a2, .LBB61_965
@@ -9771,11 +9781,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 451
; CHECK-RV32-NEXT: li a4, 450
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 8
; CHECK-RV32-NEXT: bnez a2, .LBB61_966
@@ -9784,11 +9794,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 452
; CHECK-RV32-NEXT: li a4, 451
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 16
; CHECK-RV32-NEXT: bnez a2, .LBB61_967
@@ -9797,11 +9807,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 453
; CHECK-RV32-NEXT: li a4, 452
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 32
; CHECK-RV32-NEXT: bnez a2, .LBB61_968
@@ -9810,11 +9820,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 454
; CHECK-RV32-NEXT: li a4, 453
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 64
; CHECK-RV32-NEXT: bnez a2, .LBB61_969
@@ -9823,11 +9833,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 455
; CHECK-RV32-NEXT: li a4, 454
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 128
; CHECK-RV32-NEXT: bnez a2, .LBB61_970
@@ -9836,11 +9846,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 456
; CHECK-RV32-NEXT: li a4, 455
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 256
; CHECK-RV32-NEXT: bnez a2, .LBB61_971
@@ -9849,11 +9859,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 457
; CHECK-RV32-NEXT: li a4, 456
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 512
; CHECK-RV32-NEXT: bnez a2, .LBB61_972
@@ -9862,11 +9872,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 458
; CHECK-RV32-NEXT: li a4, 457
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: andi a2, a3, 1024
; CHECK-RV32-NEXT: bnez a2, .LBB61_973
@@ -9875,11 +9885,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 459
; CHECK-RV32-NEXT: li a4, 458
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 20
; CHECK-RV32-NEXT: bltz a2, .LBB61_974
@@ -9888,11 +9898,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 460
; CHECK-RV32-NEXT: li a4, 459
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 19
; CHECK-RV32-NEXT: bltz a2, .LBB61_975
@@ -9901,11 +9911,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 461
; CHECK-RV32-NEXT: li a4, 460
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 18
; CHECK-RV32-NEXT: bltz a2, .LBB61_976
@@ -9914,11 +9924,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 462
; CHECK-RV32-NEXT: li a4, 461
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 17
; CHECK-RV32-NEXT: bltz a2, .LBB61_977
@@ -9927,11 +9937,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 463
; CHECK-RV32-NEXT: li a4, 462
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 16
; CHECK-RV32-NEXT: bltz a2, .LBB61_978
@@ -9940,11 +9950,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 464
; CHECK-RV32-NEXT: li a4, 463
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 15
; CHECK-RV32-NEXT: bltz a2, .LBB61_979
@@ -9953,11 +9963,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 465
; CHECK-RV32-NEXT: li a4, 464
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 14
; CHECK-RV32-NEXT: bltz a2, .LBB61_980
@@ -9966,11 +9976,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 466
; CHECK-RV32-NEXT: li a4, 465
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 13
; CHECK-RV32-NEXT: bltz a2, .LBB61_981
@@ -9979,11 +9989,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 467
; CHECK-RV32-NEXT: li a4, 466
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 12
; CHECK-RV32-NEXT: bltz a2, .LBB61_982
@@ -9992,11 +10002,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 468
; CHECK-RV32-NEXT: li a4, 467
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 11
; CHECK-RV32-NEXT: bltz a2, .LBB61_983
@@ -10005,11 +10015,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 469
; CHECK-RV32-NEXT: li a4, 468
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 10
; CHECK-RV32-NEXT: bltz a2, .LBB61_984
@@ -10018,11 +10028,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 470
; CHECK-RV32-NEXT: li a4, 469
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 9
; CHECK-RV32-NEXT: bltz a2, .LBB61_985
@@ -10031,11 +10041,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 471
; CHECK-RV32-NEXT: li a4, 470
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 8
; CHECK-RV32-NEXT: bltz a2, .LBB61_986
@@ -10044,11 +10054,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 472
; CHECK-RV32-NEXT: li a4, 471
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 7
; CHECK-RV32-NEXT: bltz a2, .LBB61_987
@@ -10057,11 +10067,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 473
; CHECK-RV32-NEXT: li a4, 472
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 6
; CHECK-RV32-NEXT: bltz a2, .LBB61_988
@@ -10070,11 +10080,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 474
; CHECK-RV32-NEXT: li a4, 473
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 5
; CHECK-RV32-NEXT: bltz a2, .LBB61_989
@@ -10083,11 +10093,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 475
; CHECK-RV32-NEXT: li a4, 474
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 4
; CHECK-RV32-NEXT: bltz a2, .LBB61_990
@@ -10096,11 +10106,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 476
; CHECK-RV32-NEXT: li a4, 475
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 3
; CHECK-RV32-NEXT: bltz a2, .LBB61_991
@@ -10109,11 +10119,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v24, a2
+; CHECK-RV32-NEXT: vmv.s.x v16, a2
; CHECK-RV32-NEXT: li a2, 477
; CHECK-RV32-NEXT: li a4, 476
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4
+; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: slli a2, a3, 2
; CHECK-RV32-NEXT: bgez a2, .LBB61_1039
@@ -10807,22 +10817,22 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: .LBB61_63: # %else242
; CHECK-RV64-NEXT: slli a1, a2, 1
; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV64-NEXT: vslidedown.vi v16, v0, 1
+; CHECK-RV64-NEXT: vslidedown.vi v24, v0, 1
; CHECK-RV64-NEXT: bgez a1, .LBB61_65
; CHECK-RV64-NEXT: # %bb.64: # %cond.load245
; CHECK-RV64-NEXT: lbu a1, 0(a0)
-; CHECK-RV64-NEXT: vmv8r.v v24, v8
+; CHECK-RV64-NEXT: vmv8r.v v16, v8
; CHECK-RV64-NEXT: vmv.s.x v9, a1
; CHECK-RV64-NEXT: li a1, 63
; CHECK-RV64-NEXT: li a3, 62
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: vmv1r.v v24, v8
-; CHECK-RV64-NEXT: vmv8r.v v8, v24
+; CHECK-RV64-NEXT: vmv1r.v v16, v8
+; CHECK-RV64-NEXT: vmv8r.v v8, v16
; CHECK-RV64-NEXT: .LBB61_65: # %else246
; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV64-NEXT: vmv.x.s a1, v16
+; CHECK-RV64-NEXT: vmv.x.s a1, v24
; CHECK-RV64-NEXT: bgez a2, .LBB61_66
; CHECK-RV64-NEXT: j .LBB61_588
; CHECK-RV64-NEXT: .LBB61_66: # %else250
@@ -11087,22 +11097,22 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: .LBB61_129: # %else498
; CHECK-RV64-NEXT: slli a2, a1, 1
; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV64-NEXT: vslidedown.vi v16, v0, 2
+; CHECK-RV64-NEXT: vslidedown.vi v24, v0, 2
; CHECK-RV64-NEXT: bgez a2, .LBB61_131
; CHECK-RV64-NEXT: # %bb.130: # %cond.load501
; CHECK-RV64-NEXT: lbu a2, 0(a0)
-; CHECK-RV64-NEXT: vmv8r.v v24, v8
+; CHECK-RV64-NEXT: vmv8r.v v16, v8
; CHECK-RV64-NEXT: vmv.s.x v10, a2
; CHECK-RV64-NEXT: li a2, 127
; CHECK-RV64-NEXT: li a3, 126
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: vmv2r.v v24, v8
-; CHECK-RV64-NEXT: vmv8r.v v8, v24
+; CHECK-RV64-NEXT: vmv2r.v v16, v8
+; CHECK-RV64-NEXT: vmv8r.v v8, v16
; CHECK-RV64-NEXT: .LBB61_131: # %else502
; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV64-NEXT: vmv.x.s a2, v16
+; CHECK-RV64-NEXT: vmv.x.s a2, v24
; CHECK-RV64-NEXT: bgez a1, .LBB61_132
; CHECK-RV64-NEXT: j .LBB61_650
; CHECK-RV64-NEXT: .LBB61_132: # %else506
@@ -11367,22 +11377,22 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: .LBB61_195: # %else754
; CHECK-RV64-NEXT: slli a1, a2, 1
; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV64-NEXT: vslidedown.vi v16, v0, 3
+; CHECK-RV64-NEXT: vslidedown.vi v24, v0, 3
; CHECK-RV64-NEXT: bgez a1, .LBB61_197
; CHECK-RV64-NEXT: # %bb.196: # %cond.load757
; CHECK-RV64-NEXT: lbu a1, 0(a0)
-; CHECK-RV64-NEXT: vmv8r.v v24, v8
+; CHECK-RV64-NEXT: vmv8r.v v16, v8
; CHECK-RV64-NEXT: vmv.s.x v12, a1
; CHECK-RV64-NEXT: li a1, 191
; CHECK-RV64-NEXT: li a3, 190
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: vmv4r.v v24, v8
-; CHECK-RV64-NEXT: vmv8r.v v8, v24
+; CHECK-RV64-NEXT: vmv4r.v v16, v8
+; CHECK-RV64-NEXT: vmv8r.v v8, v16
; CHECK-RV64-NEXT: .LBB61_197: # %else758
; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV64-NEXT: vmv.x.s a1, v16
+; CHECK-RV64-NEXT: vmv.x.s a1, v24
; CHECK-RV64-NEXT: bgez a2, .LBB61_198
; CHECK-RV64-NEXT: j .LBB61_712
; CHECK-RV64-NEXT: .LBB61_198: # %else762
@@ -11647,22 +11657,22 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: .LBB61_261: # %else1010
; CHECK-RV64-NEXT: slli a2, a1, 1
; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV64-NEXT: vslidedown.vi v16, v0, 4
+; CHECK-RV64-NEXT: vslidedown.vi v24, v0, 4
; CHECK-RV64-NEXT: bgez a2, .LBB61_263
; CHECK-RV64-NEXT: # %bb.262: # %cond.load1013
; CHECK-RV64-NEXT: lbu a2, 0(a0)
-; CHECK-RV64-NEXT: vmv8r.v v24, v8
+; CHECK-RV64-NEXT: vmv8r.v v16, v8
; CHECK-RV64-NEXT: vmv.s.x v12, a2
; CHECK-RV64-NEXT: li a2, 255
; CHECK-RV64-NEXT: li a3, 254
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: vmv4r.v v24, v8
-; CHECK-RV64-NEXT: vmv8r.v v8, v24
+; CHECK-RV64-NEXT: vmv4r.v v16, v8
+; CHECK-RV64-NEXT: vmv8r.v v8, v16
; CHECK-RV64-NEXT: .LBB61_263: # %else1014
; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV64-NEXT: vmv.x.s a2, v16
+; CHECK-RV64-NEXT: vmv.x.s a2, v24
; CHECK-RV64-NEXT: bgez a1, .LBB61_264
; CHECK-RV64-NEXT: j .LBB61_774
; CHECK-RV64-NEXT: .LBB61_264: # %else1018
@@ -11925,19 +11935,19 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: .LBB61_327: # %else1266
; CHECK-RV64-NEXT: slli a1, a2, 1
; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV64-NEXT: vslidedown.vi v16, v0, 5
+; CHECK-RV64-NEXT: vslidedown.vi v24, v0, 5
; CHECK-RV64-NEXT: bgez a1, .LBB61_329
; CHECK-RV64-NEXT: # %bb.328: # %cond.load1269
; CHECK-RV64-NEXT: lbu a1, 0(a0)
-; CHECK-RV64-NEXT: vmv.s.x v24, a1
+; CHECK-RV64-NEXT: vmv.s.x v16, a1
; CHECK-RV64-NEXT: li a1, 319
; CHECK-RV64-NEXT: li a3, 318
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
-; CHECK-RV64-NEXT: vslideup.vx v8, v24, a3
+; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: .LBB61_329: # %else1270
; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV64-NEXT: vmv.x.s a1, v16
+; CHECK-RV64-NEXT: vmv.x.s a1, v24
; CHECK-RV64-NEXT: bgez a2, .LBB61_330
; CHECK-RV64-NEXT: j .LBB61_836
; CHECK-RV64-NEXT: .LBB61_330: # %else1274
@@ -12200,19 +12210,19 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: .LBB61_393: # %else1522
; CHECK-RV64-NEXT: slli a2, a1, 1
; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV64-NEXT: vslidedown.vi v16, v0, 6
+; CHECK-RV64-NEXT: vslidedown.vi v24, v0, 6
; CHECK-RV64-NEXT: bgez a2, .LBB61_395
; CHECK-RV64-NEXT: # %bb.394: # %cond.load1525
; CHECK-RV64-NEXT: lbu a2, 0(a0)
-; CHECK-RV64-NEXT: vmv.s.x v24, a2
+; CHECK-RV64-NEXT: vmv.s.x v16, a2
; CHECK-RV64-NEXT: li a2, 383
; CHECK-RV64-NEXT: li a3, 382
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-RV64-NEXT: vslideup.vx v8, v24, a3
+; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: .LBB61_395: # %else1526
; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV64-NEXT: vmv.x.s a2, v16
+; CHECK-RV64-NEXT: vmv.x.s a2, v24
; CHECK-RV64-NEXT: bgez a1, .LBB61_396
; CHECK-RV64-NEXT: j .LBB61_898
; CHECK-RV64-NEXT: .LBB61_396: # %else1530
@@ -12475,19 +12485,19 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: .LBB61_459: # %else1778
; CHECK-RV64-NEXT: slli a1, a2, 1
; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV64-NEXT: vslidedown.vi v16, v0, 7
+; CHECK-RV64-NEXT: vslidedown.vi v24, v0, 7
; CHECK-RV64-NEXT: bgez a1, .LBB61_461
; CHECK-RV64-NEXT: # %bb.460: # %cond.load1781
; CHECK-RV64-NEXT: lbu a1, 0(a0)
-; CHECK-RV64-NEXT: vmv.s.x v24, a1
+; CHECK-RV64-NEXT: vmv.s.x v16, a1
; CHECK-RV64-NEXT: li a1, 447
; CHECK-RV64-NEXT: li a3, 446
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
-; CHECK-RV64-NEXT: vslideup.vx v8, v24, a3
+; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: .LBB61_461: # %else1782
; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-RV64-NEXT: vmv.x.s a1, v16
+; CHECK-RV64-NEXT: vmv.x.s a1, v24
; CHECK-RV64-NEXT: bgez a2, .LBB61_462
; CHECK-RV64-NEXT: j .LBB61_960
; CHECK-RV64-NEXT: .LBB61_462: # %else1786
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll
index 3153b44386d7a..2356237d790b6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll
@@ -153,10 +153,10 @@ define void @abs_v4i64_of_sext_v4i8(ptr %x) {
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vrsub.vi v9, v8, 0
-; CHECK-NEXT: vmax.vv v8, v8, v9
+; CHECK-NEXT: vmax.vv v10, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT: vzext.vf8 v10, v8
-; CHECK-NEXT: vse64.v v10, (a0)
+; CHECK-NEXT: vzext.vf8 v8, v10
+; CHECK-NEXT: vse64.v v8, (a0)
; CHECK-NEXT: ret
%a = load <4 x i8>, ptr %x
%a.ext = sext <4 x i8> %a to <4 x i64>
@@ -171,10 +171,10 @@ define void @abs_v4i64_of_sext_v4i16(ptr %x) {
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vrsub.vi v9, v8, 0
-; CHECK-NEXT: vmax.vv v8, v8, v9
+; CHECK-NEXT: vmax.vv v10, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT: vzext.vf4 v10, v8
-; CHECK-NEXT: vse64.v v10, (a0)
+; CHECK-NEXT: vzext.vf4 v8, v10
+; CHECK-NEXT: vse64.v v8, (a0)
; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %x
%a.ext = sext <4 x i16> %a to <4 x i64>
@@ -189,10 +189,10 @@ define void @abs_v4i64_of_sext_v4i32(ptr %x) {
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vrsub.vi v9, v8, 0
-; CHECK-NEXT: vmax.vv v8, v8, v9
+; CHECK-NEXT: vmax.vv v10, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vse64.v v10, (a0)
+; CHECK-NEXT: vzext.vf2 v8, v10
+; CHECK-NEXT: vse64.v v8, (a0)
; CHECK-NEXT: ret
%a = load <4 x i32>, ptr %x
%a.ext = sext <4 x i32> %a to <4 x i64>
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
index 4bd4a9a854f36..829cc9bac7ed3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
@@ -40,12 +40,12 @@ define void @ctlz_v16i8(ptr %x, ptr %y) nounwind {
; RVF-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RVF-NEXT: vle8.v v8, (a0)
; RVF-NEXT: li a1, 134
-; RVF-NEXT: vzext.vf2 v10, v8
-; RVF-NEXT: vfwcvt.f.xu.v v12, v10
-; RVF-NEXT: vnsrl.wi v8, v12, 23
+; RVF-NEXT: vzext.vf2 v12, v8
+; RVF-NEXT: vfwcvt.f.xu.v v8, v12
+; RVF-NEXT: vnsrl.wi v12, v8, 23
; RVF-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; RVF-NEXT: vnsrl.wi v10, v8, 0
-; RVF-NEXT: vrsub.vx v8, v10, a1
+; RVF-NEXT: vnsrl.wi v8, v12, 0
+; RVF-NEXT: vrsub.vx v8, v8, a1
; RVF-NEXT: li a1, 8
; RVF-NEXT: vminu.vx v8, v8, a1
; RVF-NEXT: vse8.v v8, (a0)
@@ -56,12 +56,12 @@ define void @ctlz_v16i8(ptr %x, ptr %y) nounwind {
; RVD-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RVD-NEXT: vle8.v v8, (a0)
; RVD-NEXT: li a1, 134
-; RVD-NEXT: vzext.vf2 v10, v8
-; RVD-NEXT: vfwcvt.f.xu.v v12, v10
-; RVD-NEXT: vnsrl.wi v8, v12, 23
+; RVD-NEXT: vzext.vf2 v12, v8
+; RVD-NEXT: vfwcvt.f.xu.v v8, v12
+; RVD-NEXT: vnsrl.wi v12, v8, 23
; RVD-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; RVD-NEXT: vnsrl.wi v10, v8, 0
-; RVD-NEXT: vrsub.vx v8, v10, a1
+; RVD-NEXT: vnsrl.wi v8, v12, 0
+; RVD-NEXT: vrsub.vx v8, v8, a1
; RVD-NEXT: li a1, 8
; RVD-NEXT: vminu.vx v8, v8, a1
; RVD-NEXT: vse8.v v8, (a0)
@@ -121,11 +121,11 @@ define void @ctlz_v8i16(ptr %x, ptr %y) nounwind {
; RVF-LABEL: ctlz_v8i16:
; RVF: # %bb.0:
; RVF-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RVF-NEXT: vle16.v v8, (a0)
+; RVF-NEXT: vle16.v v10, (a0)
; RVF-NEXT: li a1, 142
-; RVF-NEXT: vfwcvt.f.xu.v v10, v8
-; RVF-NEXT: vnsrl.wi v8, v10, 23
-; RVF-NEXT: vrsub.vx v8, v8, a1
+; RVF-NEXT: vfwcvt.f.xu.v v8, v10
+; RVF-NEXT: vnsrl.wi v10, v8, 23
+; RVF-NEXT: vrsub.vx v8, v10, a1
; RVF-NEXT: li a1, 16
; RVF-NEXT: vminu.vx v8, v8, a1
; RVF-NEXT: vse16.v v8, (a0)
@@ -134,11 +134,11 @@ define void @ctlz_v8i16(ptr %x, ptr %y) nounwind {
; RVD-LABEL: ctlz_v8i16:
; RVD: # %bb.0:
; RVD-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RVD-NEXT: vle16.v v8, (a0)
+; RVD-NEXT: vle16.v v10, (a0)
; RVD-NEXT: li a1, 142
-; RVD-NEXT: vfwcvt.f.xu.v v10, v8
-; RVD-NEXT: vnsrl.wi v8, v10, 23
-; RVD-NEXT: vrsub.vx v8, v8, a1
+; RVD-NEXT: vfwcvt.f.xu.v v8, v10
+; RVD-NEXT: vnsrl.wi v10, v8, 23
+; RVD-NEXT: vrsub.vx v8, v10, a1
; RVD-NEXT: li a1, 16
; RVD-NEXT: vminu.vx v8, v8, a1
; RVD-NEXT: vse16.v v8, (a0)
@@ -216,12 +216,12 @@ define void @ctlz_v4i32(ptr %x, ptr %y) nounwind {
; RVD-LABEL: ctlz_v4i32:
; RVD: # %bb.0:
; RVD-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RVD-NEXT: vle32.v v8, (a0)
+; RVD-NEXT: vle32.v v10, (a0)
; RVD-NEXT: li a1, 52
-; RVD-NEXT: vfwcvt.f.xu.v v10, v8
-; RVD-NEXT: vnsrl.wx v8, v10, a1
+; RVD-NEXT: vfwcvt.f.xu.v v8, v10
+; RVD-NEXT: vnsrl.wx v10, v8, a1
; RVD-NEXT: li a1, 1054
-; RVD-NEXT: vrsub.vx v8, v8, a1
+; RVD-NEXT: vrsub.vx v8, v10, a1
; RVD-NEXT: li a1, 32
; RVD-NEXT: vminu.vx v8, v8, a1
; RVD-NEXT: vse32.v v8, (a0)
@@ -429,12 +429,12 @@ define void @ctlz_v32i8(ptr %x, ptr %y) nounwind {
; RVF-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; RVF-NEXT: vle8.v v8, (a0)
; RVF-NEXT: li a1, 134
-; RVF-NEXT: vzext.vf2 v12, v8
-; RVF-NEXT: vfwcvt.f.xu.v v16, v12
-; RVF-NEXT: vnsrl.wi v8, v16, 23
+; RVF-NEXT: vzext.vf2 v16, v8
+; RVF-NEXT: vfwcvt.f.xu.v v8, v16
+; RVF-NEXT: vnsrl.wi v16, v8, 23
; RVF-NEXT: vsetvli zero, zero, e8, m2, ta, ma
-; RVF-NEXT: vnsrl.wi v12, v8, 0
-; RVF-NEXT: vrsub.vx v8, v12, a1
+; RVF-NEXT: vnsrl.wi v8, v16, 0
+; RVF-NEXT: vrsub.vx v8, v8, a1
; RVF-NEXT: li a1, 8
; RVF-NEXT: vminu.vx v8, v8, a1
; RVF-NEXT: vse8.v v8, (a0)
@@ -446,12 +446,12 @@ define void @ctlz_v32i8(ptr %x, ptr %y) nounwind {
; RVD-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; RVD-NEXT: vle8.v v8, (a0)
; RVD-NEXT: li a1, 134
-; RVD-NEXT: vzext.vf2 v12, v8
-; RVD-NEXT: vfwcvt.f.xu.v v16, v12
-; RVD-NEXT: vnsrl.wi v8, v16, 23
+; RVD-NEXT: vzext.vf2 v16, v8
+; RVD-NEXT: vfwcvt.f.xu.v v8, v16
+; RVD-NEXT: vnsrl.wi v16, v8, 23
; RVD-NEXT: vsetvli zero, zero, e8, m2, ta, ma
-; RVD-NEXT: vnsrl.wi v12, v8, 0
-; RVD-NEXT: vrsub.vx v8, v12, a1
+; RVD-NEXT: vnsrl.wi v8, v16, 0
+; RVD-NEXT: vrsub.vx v8, v8, a1
; RVD-NEXT: li a1, 8
; RVD-NEXT: vminu.vx v8, v8, a1
; RVD-NEXT: vse8.v v8, (a0)
@@ -512,11 +512,11 @@ define void @ctlz_v16i16(ptr %x, ptr %y) nounwind {
; RVF-LABEL: ctlz_v16i16:
; RVF: # %bb.0:
; RVF-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; RVF-NEXT: vle16.v v8, (a0)
+; RVF-NEXT: vle16.v v12, (a0)
; RVF-NEXT: li a1, 142
-; RVF-NEXT: vfwcvt.f.xu.v v12, v8
-; RVF-NEXT: vnsrl.wi v8, v12, 23
-; RVF-NEXT: vrsub.vx v8, v8, a1
+; RVF-NEXT: vfwcvt.f.xu.v v8, v12
+; RVF-NEXT: vnsrl.wi v12, v8, 23
+; RVF-NEXT: vrsub.vx v8, v12, a1
; RVF-NEXT: li a1, 16
; RVF-NEXT: vminu.vx v8, v8, a1
; RVF-NEXT: vse16.v v8, (a0)
@@ -525,11 +525,11 @@ define void @ctlz_v16i16(ptr %x, ptr %y) nounwind {
; RVD-LABEL: ctlz_v16i16:
; RVD: # %bb.0:
; RVD-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; RVD-NEXT: vle16.v v8, (a0)
+; RVD-NEXT: vle16.v v12, (a0)
; RVD-NEXT: li a1, 142
-; RVD-NEXT: vfwcvt.f.xu.v v12, v8
-; RVD-NEXT: vnsrl.wi v8, v12, 23
-; RVD-NEXT: vrsub.vx v8, v8, a1
+; RVD-NEXT: vfwcvt.f.xu.v v8, v12
+; RVD-NEXT: vnsrl.wi v12, v8, 23
+; RVD-NEXT: vrsub.vx v8, v12, a1
; RVD-NEXT: li a1, 16
; RVD-NEXT: vminu.vx v8, v8, a1
; RVD-NEXT: vse16.v v8, (a0)
@@ -607,12 +607,12 @@ define void @ctlz_v8i32(ptr %x, ptr %y) nounwind {
; RVD-LABEL: ctlz_v8i32:
; RVD: # %bb.0:
; RVD-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RVD-NEXT: vle32.v v8, (a0)
+; RVD-NEXT: vle32.v v12, (a0)
; RVD-NEXT: li a1, 52
-; RVD-NEXT: vfwcvt.f.xu.v v12, v8
-; RVD-NEXT: vnsrl.wx v8, v12, a1
+; RVD-NEXT: vfwcvt.f.xu.v v8, v12
+; RVD-NEXT: vnsrl.wx v12, v8, a1
; RVD-NEXT: li a1, 1054
-; RVD-NEXT: vrsub.vx v8, v8, a1
+; RVD-NEXT: vrsub.vx v8, v12, a1
; RVD-NEXT: li a1, 32
; RVD-NEXT: vminu.vx v8, v8, a1
; RVD-NEXT: vse32.v v8, (a0)
@@ -747,11 +747,11 @@ define void @ctlz_v4i64(ptr %x, ptr %y) nounwind {
; RVF-NEXT: fsrmi a1, 1
; RVF-NEXT: vfncvt.f.xu.w v11, v8
; RVF-NEXT: fsrm a1
-; RVF-NEXT: vsrl.vi v8, v11, 23
-; RVF-NEXT: vwsubu.vv v12, v10, v8
+; RVF-NEXT: vsrl.vi v11, v11, 23
+; RVF-NEXT: vwsubu.vv v8, v10, v11
; RVF-NEXT: li a1, 64
; RVF-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RVF-NEXT: vminu.vx v8, v12, a1
+; RVF-NEXT: vminu.vx v8, v8, a1
; RVF-NEXT: vse64.v v8, (a0)
; RVF-NEXT: ret
;
@@ -817,13 +817,13 @@ define void @ctlz_zero_undef_v16i8(ptr %x, ptr %y) nounwind {
; RVF: # %bb.0:
; RVF-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RVF-NEXT: vle8.v v8, (a0)
-; RVF-NEXT: vzext.vf2 v10, v8
-; RVF-NEXT: vfwcvt.f.xu.v v12, v10
-; RVF-NEXT: vnsrl.wi v8, v12, 23
+; RVF-NEXT: vzext.vf2 v12, v8
+; RVF-NEXT: vfwcvt.f.xu.v v8, v12
+; RVF-NEXT: vnsrl.wi v12, v8, 23
; RVF-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; RVF-NEXT: vnsrl.wi v10, v8, 0
+; RVF-NEXT: vnsrl.wi v8, v12, 0
; RVF-NEXT: li a1, 134
-; RVF-NEXT: vrsub.vx v8, v10, a1
+; RVF-NEXT: vrsub.vx v8, v8, a1
; RVF-NEXT: vse8.v v8, (a0)
; RVF-NEXT: ret
;
@@ -831,13 +831,13 @@ define void @ctlz_zero_undef_v16i8(ptr %x, ptr %y) nounwind {
; RVD: # %bb.0:
; RVD-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RVD-NEXT: vle8.v v8, (a0)
-; RVD-NEXT: vzext.vf2 v10, v8
-; RVD-NEXT: vfwcvt.f.xu.v v12, v10
-; RVD-NEXT: vnsrl.wi v8, v12, 23
+; RVD-NEXT: vzext.vf2 v12, v8
+; RVD-NEXT: vfwcvt.f.xu.v v8, v12
+; RVD-NEXT: vnsrl.wi v12, v8, 23
; RVD-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; RVD-NEXT: vnsrl.wi v10, v8, 0
+; RVD-NEXT: vnsrl.wi v8, v12, 0
; RVD-NEXT: li a1, 134
-; RVD-NEXT: vrsub.vx v8, v10, a1
+; RVD-NEXT: vrsub.vx v8, v8, a1
; RVD-NEXT: vse8.v v8, (a0)
; RVD-NEXT: ret
;
@@ -894,22 +894,22 @@ define void @ctlz_zero_undef_v8i16(ptr %x, ptr %y) nounwind {
; RVF-LABEL: ctlz_zero_undef_v8i16:
; RVF: # %bb.0:
; RVF-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RVF-NEXT: vle16.v v8, (a0)
-; RVF-NEXT: vfwcvt.f.xu.v v10, v8
-; RVF-NEXT: vnsrl.wi v8, v10, 23
+; RVF-NEXT: vle16.v v10, (a0)
+; RVF-NEXT: vfwcvt.f.xu.v v8, v10
+; RVF-NEXT: vnsrl.wi v10, v8, 23
; RVF-NEXT: li a1, 142
-; RVF-NEXT: vrsub.vx v8, v8, a1
+; RVF-NEXT: vrsub.vx v8, v10, a1
; RVF-NEXT: vse16.v v8, (a0)
; RVF-NEXT: ret
;
; RVD-LABEL: ctlz_zero_undef_v8i16:
; RVD: # %bb.0:
; RVD-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RVD-NEXT: vle16.v v8, (a0)
-; RVD-NEXT: vfwcvt.f.xu.v v10, v8
-; RVD-NEXT: vnsrl.wi v8, v10, 23
+; RVD-NEXT: vle16.v v10, (a0)
+; RVD-NEXT: vfwcvt.f.xu.v v8, v10
+; RVD-NEXT: vnsrl.wi v10, v8, 23
; RVD-NEXT: li a1, 142
-; RVD-NEXT: vrsub.vx v8, v8, a1
+; RVD-NEXT: vrsub.vx v8, v10, a1
; RVD-NEXT: vse16.v v8, (a0)
; RVD-NEXT: ret
;
@@ -982,12 +982,12 @@ define void @ctlz_zero_undef_v4i32(ptr %x, ptr %y) nounwind {
; RVD-LABEL: ctlz_zero_undef_v4i32:
; RVD: # %bb.0:
; RVD-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RVD-NEXT: vle32.v v8, (a0)
+; RVD-NEXT: vle32.v v10, (a0)
; RVD-NEXT: li a1, 52
-; RVD-NEXT: vfwcvt.f.xu.v v10, v8
-; RVD-NEXT: vnsrl.wx v8, v10, a1
+; RVD-NEXT: vfwcvt.f.xu.v v8, v10
+; RVD-NEXT: vnsrl.wx v10, v8, a1
; RVD-NEXT: li a1, 1054
-; RVD-NEXT: vrsub.vx v8, v8, a1
+; RVD-NEXT: vrsub.vx v8, v10, a1
; RVD-NEXT: vse32.v v8, (a0)
; RVD-NEXT: ret
;
@@ -1185,13 +1185,13 @@ define void @ctlz_zero_undef_v32i8(ptr %x, ptr %y) nounwind {
; RVF-NEXT: li a1, 32
; RVF-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; RVF-NEXT: vle8.v v8, (a0)
-; RVF-NEXT: vzext.vf2 v12, v8
-; RVF-NEXT: vfwcvt.f.xu.v v16, v12
-; RVF-NEXT: vnsrl.wi v8, v16, 23
+; RVF-NEXT: vzext.vf2 v16, v8
+; RVF-NEXT: vfwcvt.f.xu.v v8, v16
+; RVF-NEXT: vnsrl.wi v16, v8, 23
; RVF-NEXT: vsetvli zero, zero, e8, m2, ta, ma
-; RVF-NEXT: vnsrl.wi v12, v8, 0
+; RVF-NEXT: vnsrl.wi v8, v16, 0
; RVF-NEXT: li a1, 134
-; RVF-NEXT: vrsub.vx v8, v12, a1
+; RVF-NEXT: vrsub.vx v8, v8, a1
; RVF-NEXT: vse8.v v8, (a0)
; RVF-NEXT: ret
;
@@ -1200,13 +1200,13 @@ define void @ctlz_zero_undef_v32i8(ptr %x, ptr %y) nounwind {
; RVD-NEXT: li a1, 32
; RVD-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; RVD-NEXT: vle8.v v8, (a0)
-; RVD-NEXT: vzext.vf2 v12, v8
-; RVD-NEXT: vfwcvt.f.xu.v v16, v12
-; RVD-NEXT: vnsrl.wi v8, v16, 23
+; RVD-NEXT: vzext.vf2 v16, v8
+; RVD-NEXT: vfwcvt.f.xu.v v8, v16
+; RVD-NEXT: vnsrl.wi v16, v8, 23
; RVD-NEXT: vsetvli zero, zero, e8, m2, ta, ma
-; RVD-NEXT: vnsrl.wi v12, v8, 0
+; RVD-NEXT: vnsrl.wi v8, v16, 0
; RVD-NEXT: li a1, 134
-; RVD-NEXT: vrsub.vx v8, v12, a1
+; RVD-NEXT: vrsub.vx v8, v8, a1
; RVD-NEXT: vse8.v v8, (a0)
; RVD-NEXT: ret
;
@@ -1264,22 +1264,22 @@ define void @ctlz_zero_undef_v16i16(ptr %x, ptr %y) nounwind {
; RVF-LABEL: ctlz_zero_undef_v16i16:
; RVF: # %bb.0:
; RVF-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; RVF-NEXT: vle16.v v8, (a0)
-; RVF-NEXT: vfwcvt.f.xu.v v12, v8
-; RVF-NEXT: vnsrl.wi v8, v12, 23
+; RVF-NEXT: vle16.v v12, (a0)
+; RVF-NEXT: vfwcvt.f.xu.v v8, v12
+; RVF-NEXT: vnsrl.wi v12, v8, 23
; RVF-NEXT: li a1, 142
-; RVF-NEXT: vrsub.vx v8, v8, a1
+; RVF-NEXT: vrsub.vx v8, v12, a1
; RVF-NEXT: vse16.v v8, (a0)
; RVF-NEXT: ret
;
; RVD-LABEL: ctlz_zero_undef_v16i16:
; RVD: # %bb.0:
; RVD-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; RVD-NEXT: vle16.v v8, (a0)
-; RVD-NEXT: vfwcvt.f.xu.v v12, v8
-; RVD-NEXT: vnsrl.wi v8, v12, 23
+; RVD-NEXT: vle16.v v12, (a0)
+; RVD-NEXT: vfwcvt.f.xu.v v8, v12
+; RVD-NEXT: vnsrl.wi v12, v8, 23
; RVD-NEXT: li a1, 142
-; RVD-NEXT: vrsub.vx v8, v8, a1
+; RVD-NEXT: vrsub.vx v8, v12, a1
; RVD-NEXT: vse16.v v8, (a0)
; RVD-NEXT: ret
;
@@ -1352,12 +1352,12 @@ define void @ctlz_zero_undef_v8i32(ptr %x, ptr %y) nounwind {
; RVD-LABEL: ctlz_zero_undef_v8i32:
; RVD: # %bb.0:
; RVD-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RVD-NEXT: vle32.v v8, (a0)
+; RVD-NEXT: vle32.v v12, (a0)
; RVD-NEXT: li a1, 52
-; RVD-NEXT: vfwcvt.f.xu.v v12, v8
-; RVD-NEXT: vnsrl.wx v8, v12, a1
+; RVD-NEXT: vfwcvt.f.xu.v v8, v12
+; RVD-NEXT: vnsrl.wx v12, v8, a1
; RVD-NEXT: li a1, 1054
-; RVD-NEXT: vrsub.vx v8, v8, a1
+; RVD-NEXT: vrsub.vx v8, v12, a1
; RVD-NEXT: vse32.v v8, (a0)
; RVD-NEXT: ret
;
@@ -1489,9 +1489,9 @@ define void @ctlz_zero_undef_v4i64(ptr %x, ptr %y) nounwind {
; RVF-NEXT: fsrmi a1, 1
; RVF-NEXT: vfncvt.f.xu.w v11, v8
; RVF-NEXT: fsrm a1
-; RVF-NEXT: vsrl.vi v8, v11, 23
-; RVF-NEXT: vwsubu.vv v12, v10, v8
-; RVF-NEXT: vse64.v v12, (a0)
+; RVF-NEXT: vsrl.vi v11, v11, 23
+; RVF-NEXT: vwsubu.vv v8, v10, v11
+; RVF-NEXT: vse64.v v8, (a0)
; RVF-NEXT: ret
;
; RVD-LABEL: ctlz_zero_undef_v4i64:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
index 1724b92a9ab48..d884cece89507 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
@@ -34,18 +34,18 @@ define void @cttz_v16i8(ptr %x, ptr %y) nounwind {
; RVF-LABEL: cttz_v16i8:
; RVF: # %bb.0:
; RVF-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RVF-NEXT: vle8.v v8, (a0)
+; RVF-NEXT: vle8.v v14, (a0)
; RVF-NEXT: li a1, 127
-; RVF-NEXT: vrsub.vi v9, v8, 0
-; RVF-NEXT: vand.vv v9, v8, v9
+; RVF-NEXT: vrsub.vi v8, v14, 0
+; RVF-NEXT: vand.vv v8, v14, v8
; RVF-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RVF-NEXT: vzext.vf2 v10, v9
-; RVF-NEXT: vfwcvt.f.xu.v v12, v10
-; RVF-NEXT: vnsrl.wi v10, v12, 23
+; RVF-NEXT: vzext.vf2 v12, v8
+; RVF-NEXT: vfwcvt.f.xu.v v8, v12
+; RVF-NEXT: vnsrl.wi v12, v8, 23
; RVF-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; RVF-NEXT: vnsrl.wi v9, v10, 0
-; RVF-NEXT: vmseq.vi v0, v8, 0
-; RVF-NEXT: vsub.vx v8, v9, a1
+; RVF-NEXT: vnsrl.wi v8, v12, 0
+; RVF-NEXT: vmseq.vi v0, v14, 0
+; RVF-NEXT: vsub.vx v8, v8, a1
; RVF-NEXT: vmerge.vim v8, v8, 8, v0
; RVF-NEXT: vse8.v v8, (a0)
; RVF-NEXT: ret
@@ -53,18 +53,18 @@ define void @cttz_v16i8(ptr %x, ptr %y) nounwind {
; RVD-LABEL: cttz_v16i8:
; RVD: # %bb.0:
; RVD-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RVD-NEXT: vle8.v v8, (a0)
+; RVD-NEXT: vle8.v v14, (a0)
; RVD-NEXT: li a1, 127
-; RVD-NEXT: vrsub.vi v9, v8, 0
-; RVD-NEXT: vand.vv v9, v8, v9
+; RVD-NEXT: vrsub.vi v8, v14, 0
+; RVD-NEXT: vand.vv v8, v14, v8
; RVD-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RVD-NEXT: vzext.vf2 v10, v9
-; RVD-NEXT: vfwcvt.f.xu.v v12, v10
-; RVD-NEXT: vnsrl.wi v10, v12, 23
+; RVD-NEXT: vzext.vf2 v12, v8
+; RVD-NEXT: vfwcvt.f.xu.v v8, v12
+; RVD-NEXT: vnsrl.wi v12, v8, 23
; RVD-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; RVD-NEXT: vnsrl.wi v9, v10, 0
-; RVD-NEXT: vmseq.vi v0, v8, 0
-; RVD-NEXT: vsub.vx v8, v9, a1
+; RVD-NEXT: vnsrl.wi v8, v12, 0
+; RVD-NEXT: vmseq.vi v0, v14, 0
+; RVD-NEXT: vsub.vx v8, v8, a1
; RVD-NEXT: vmerge.vim v8, v8, 8, v0
; RVD-NEXT: vse8.v v8, (a0)
; RVD-NEXT: ret
@@ -121,10 +121,10 @@ define void @cttz_v8i16(ptr %x, ptr %y) nounwind {
; RVF-NEXT: li a1, 127
; RVF-NEXT: vrsub.vi v9, v8, 0
; RVF-NEXT: vmseq.vi v0, v8, 0
-; RVF-NEXT: vand.vv v8, v8, v9
-; RVF-NEXT: vfwcvt.f.xu.v v10, v8
-; RVF-NEXT: vnsrl.wi v8, v10, 23
-; RVF-NEXT: vsub.vx v8, v8, a1
+; RVF-NEXT: vand.vv v10, v8, v9
+; RVF-NEXT: vfwcvt.f.xu.v v8, v10
+; RVF-NEXT: vnsrl.wi v10, v8, 23
+; RVF-NEXT: vsub.vx v8, v10, a1
; RVF-NEXT: li a1, 16
; RVF-NEXT: vmerge.vxm v8, v8, a1, v0
; RVF-NEXT: vse16.v v8, (a0)
@@ -137,10 +137,10 @@ define void @cttz_v8i16(ptr %x, ptr %y) nounwind {
; RVD-NEXT: li a1, 127
; RVD-NEXT: vrsub.vi v9, v8, 0
; RVD-NEXT: vmseq.vi v0, v8, 0
-; RVD-NEXT: vand.vv v8, v8, v9
-; RVD-NEXT: vfwcvt.f.xu.v v10, v8
-; RVD-NEXT: vnsrl.wi v8, v10, 23
-; RVD-NEXT: vsub.vx v8, v8, a1
+; RVD-NEXT: vand.vv v10, v8, v9
+; RVD-NEXT: vfwcvt.f.xu.v v8, v10
+; RVD-NEXT: vnsrl.wi v10, v8, 23
+; RVD-NEXT: vsub.vx v8, v10, a1
; RVD-NEXT: li a1, 16
; RVD-NEXT: vmerge.vxm v8, v8, a1, v0
; RVD-NEXT: vse16.v v8, (a0)
@@ -213,15 +213,15 @@ define void @cttz_v4i32(ptr %x, ptr %y) nounwind {
; RVD-LABEL: cttz_v4i32:
; RVD: # %bb.0:
; RVD-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RVD-NEXT: vle32.v v8, (a0)
+; RVD-NEXT: vle32.v v10, (a0)
; RVD-NEXT: li a1, 52
-; RVD-NEXT: vrsub.vi v9, v8, 0
-; RVD-NEXT: vand.vv v9, v8, v9
-; RVD-NEXT: vfwcvt.f.xu.v v10, v9
-; RVD-NEXT: vnsrl.wx v9, v10, a1
+; RVD-NEXT: vrsub.vi v8, v10, 0
+; RVD-NEXT: vand.vv v11, v10, v8
+; RVD-NEXT: vfwcvt.f.xu.v v8, v11
+; RVD-NEXT: vnsrl.wx v11, v8, a1
; RVD-NEXT: li a1, 1023
-; RVD-NEXT: vmseq.vi v0, v8, 0
-; RVD-NEXT: vsub.vx v8, v9, a1
+; RVD-NEXT: vmseq.vi v0, v10, 0
+; RVD-NEXT: vsub.vx v8, v11, a1
; RVD-NEXT: li a1, 32
; RVD-NEXT: vmerge.vxm v8, v8, a1, v0
; RVD-NEXT: vse32.v v8, (a0)
@@ -409,18 +409,18 @@ define void @cttz_v32i8(ptr %x, ptr %y) nounwind {
; RVF: # %bb.0:
; RVF-NEXT: li a1, 32
; RVF-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; RVF-NEXT: vle8.v v8, (a0)
+; RVF-NEXT: vle8.v v20, (a0)
; RVF-NEXT: li a1, 127
-; RVF-NEXT: vrsub.vi v10, v8, 0
-; RVF-NEXT: vand.vv v10, v8, v10
+; RVF-NEXT: vrsub.vi v8, v20, 0
+; RVF-NEXT: vand.vv v8, v20, v8
; RVF-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; RVF-NEXT: vzext.vf2 v12, v10
-; RVF-NEXT: vfwcvt.f.xu.v v16, v12
-; RVF-NEXT: vnsrl.wi v12, v16, 23
+; RVF-NEXT: vzext.vf2 v16, v8
+; RVF-NEXT: vfwcvt.f.xu.v v8, v16
+; RVF-NEXT: vnsrl.wi v16, v8, 23
; RVF-NEXT: vsetvli zero, zero, e8, m2, ta, ma
-; RVF-NEXT: vnsrl.wi v10, v12, 0
-; RVF-NEXT: vmseq.vi v0, v8, 0
-; RVF-NEXT: vsub.vx v8, v10, a1
+; RVF-NEXT: vnsrl.wi v8, v16, 0
+; RVF-NEXT: vmseq.vi v0, v20, 0
+; RVF-NEXT: vsub.vx v8, v8, a1
; RVF-NEXT: vmerge.vim v8, v8, 8, v0
; RVF-NEXT: vse8.v v8, (a0)
; RVF-NEXT: ret
@@ -429,18 +429,18 @@ define void @cttz_v32i8(ptr %x, ptr %y) nounwind {
; RVD: # %bb.0:
; RVD-NEXT: li a1, 32
; RVD-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; RVD-NEXT: vle8.v v8, (a0)
+; RVD-NEXT: vle8.v v20, (a0)
; RVD-NEXT: li a1, 127
-; RVD-NEXT: vrsub.vi v10, v8, 0
-; RVD-NEXT: vand.vv v10, v8, v10
+; RVD-NEXT: vrsub.vi v8, v20, 0
+; RVD-NEXT: vand.vv v8, v20, v8
; RVD-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; RVD-NEXT: vzext.vf2 v12, v10
-; RVD-NEXT: vfwcvt.f.xu.v v16, v12
-; RVD-NEXT: vnsrl.wi v12, v16, 23
+; RVD-NEXT: vzext.vf2 v16, v8
+; RVD-NEXT: vfwcvt.f.xu.v v8, v16
+; RVD-NEXT: vnsrl.wi v16, v8, 23
; RVD-NEXT: vsetvli zero, zero, e8, m2, ta, ma
-; RVD-NEXT: vnsrl.wi v10, v12, 0
-; RVD-NEXT: vmseq.vi v0, v8, 0
-; RVD-NEXT: vsub.vx v8, v10, a1
+; RVD-NEXT: vnsrl.wi v8, v16, 0
+; RVD-NEXT: vmseq.vi v0, v20, 0
+; RVD-NEXT: vsub.vx v8, v8, a1
; RVD-NEXT: vmerge.vim v8, v8, 8, v0
; RVD-NEXT: vse8.v v8, (a0)
; RVD-NEXT: ret
@@ -498,10 +498,10 @@ define void @cttz_v16i16(ptr %x, ptr %y) nounwind {
; RVF-NEXT: li a1, 127
; RVF-NEXT: vrsub.vi v10, v8, 0
; RVF-NEXT: vmseq.vi v0, v8, 0
-; RVF-NEXT: vand.vv v8, v8, v10
-; RVF-NEXT: vfwcvt.f.xu.v v12, v8
-; RVF-NEXT: vnsrl.wi v8, v12, 23
-; RVF-NEXT: vsub.vx v8, v8, a1
+; RVF-NEXT: vand.vv v12, v8, v10
+; RVF-NEXT: vfwcvt.f.xu.v v8, v12
+; RVF-NEXT: vnsrl.wi v12, v8, 23
+; RVF-NEXT: vsub.vx v8, v12, a1
; RVF-NEXT: li a1, 16
; RVF-NEXT: vmerge.vxm v8, v8, a1, v0
; RVF-NEXT: vse16.v v8, (a0)
@@ -514,10 +514,10 @@ define void @cttz_v16i16(ptr %x, ptr %y) nounwind {
; RVD-NEXT: li a1, 127
; RVD-NEXT: vrsub.vi v10, v8, 0
; RVD-NEXT: vmseq.vi v0, v8, 0
-; RVD-NEXT: vand.vv v8, v8, v10
-; RVD-NEXT: vfwcvt.f.xu.v v12, v8
-; RVD-NEXT: vnsrl.wi v8, v12, 23
-; RVD-NEXT: vsub.vx v8, v8, a1
+; RVD-NEXT: vand.vv v12, v8, v10
+; RVD-NEXT: vfwcvt.f.xu.v v8, v12
+; RVD-NEXT: vnsrl.wi v12, v8, 23
+; RVD-NEXT: vsub.vx v8, v12, a1
; RVD-NEXT: li a1, 16
; RVD-NEXT: vmerge.vxm v8, v8, a1, v0
; RVD-NEXT: vse16.v v8, (a0)
@@ -590,15 +590,15 @@ define void @cttz_v8i32(ptr %x, ptr %y) nounwind {
; RVD-LABEL: cttz_v8i32:
; RVD: # %bb.0:
; RVD-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RVD-NEXT: vle32.v v8, (a0)
+; RVD-NEXT: vle32.v v12, (a0)
; RVD-NEXT: li a1, 52
-; RVD-NEXT: vrsub.vi v10, v8, 0
-; RVD-NEXT: vand.vv v10, v8, v10
-; RVD-NEXT: vfwcvt.f.xu.v v12, v10
-; RVD-NEXT: vnsrl.wx v10, v12, a1
+; RVD-NEXT: vrsub.vi v8, v12, 0
+; RVD-NEXT: vand.vv v14, v12, v8
+; RVD-NEXT: vfwcvt.f.xu.v v8, v14
+; RVD-NEXT: vnsrl.wx v14, v8, a1
; RVD-NEXT: li a1, 1023
-; RVD-NEXT: vmseq.vi v0, v8, 0
-; RVD-NEXT: vsub.vx v8, v10, a1
+; RVD-NEXT: vmseq.vi v0, v12, 0
+; RVD-NEXT: vsub.vx v8, v14, a1
; RVD-NEXT: li a1, 32
; RVD-NEXT: vmerge.vxm v8, v8, a1, v0
; RVD-NEXT: vse32.v v8, (a0)
@@ -716,11 +716,11 @@ define void @cttz_v4i64(ptr %x, ptr %y) nounwind {
; RVF-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; RVF-NEXT: vmseq.vi v0, v8, 0
; RVF-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RVF-NEXT: vsrl.vi v8, v12, 23
-; RVF-NEXT: vwsubu.vx v10, v8, a1
+; RVF-NEXT: vsrl.vi v10, v12, 23
+; RVF-NEXT: vwsubu.vx v8, v10, a1
; RVF-NEXT: li a1, 64
; RVF-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RVF-NEXT: vmerge.vxm v8, v10, a1, v0
+; RVF-NEXT: vmerge.vxm v8, v8, a1, v0
; RVF-NEXT: vse64.v v8, (a0)
; RVF-NEXT: ret
;
@@ -788,13 +788,13 @@ define void @cttz_zero_undef_v16i8(ptr %x, ptr %y) nounwind {
; RVF-NEXT: vrsub.vi v9, v8, 0
; RVF-NEXT: vand.vv v8, v8, v9
; RVF-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RVF-NEXT: vzext.vf2 v10, v8
-; RVF-NEXT: vfwcvt.f.xu.v v12, v10
-; RVF-NEXT: vnsrl.wi v8, v12, 23
+; RVF-NEXT: vzext.vf2 v12, v8
+; RVF-NEXT: vfwcvt.f.xu.v v8, v12
+; RVF-NEXT: vnsrl.wi v12, v8, 23
; RVF-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; RVF-NEXT: vnsrl.wi v10, v8, 0
+; RVF-NEXT: vnsrl.wi v8, v12, 0
; RVF-NEXT: li a1, 127
-; RVF-NEXT: vsub.vx v8, v10, a1
+; RVF-NEXT: vsub.vx v8, v8, a1
; RVF-NEXT: vse8.v v8, (a0)
; RVF-NEXT: ret
;
@@ -805,13 +805,13 @@ define void @cttz_zero_undef_v16i8(ptr %x, ptr %y) nounwind {
; RVD-NEXT: vrsub.vi v9, v8, 0
; RVD-NEXT: vand.vv v8, v8, v9
; RVD-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RVD-NEXT: vzext.vf2 v10, v8
-; RVD-NEXT: vfwcvt.f.xu.v v12, v10
-; RVD-NEXT: vnsrl.wi v8, v12, 23
+; RVD-NEXT: vzext.vf2 v12, v8
+; RVD-NEXT: vfwcvt.f.xu.v v8, v12
+; RVD-NEXT: vnsrl.wi v12, v8, 23
; RVD-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; RVD-NEXT: vnsrl.wi v10, v8, 0
+; RVD-NEXT: vnsrl.wi v8, v12, 0
; RVD-NEXT: li a1, 127
-; RVD-NEXT: vsub.vx v8, v10, a1
+; RVD-NEXT: vsub.vx v8, v8, a1
; RVD-NEXT: vse8.v v8, (a0)
; RVD-NEXT: ret
;
@@ -864,11 +864,11 @@ define void @cttz_zero_undef_v8i16(ptr %x, ptr %y) nounwind {
; RVF-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RVF-NEXT: vle16.v v8, (a0)
; RVF-NEXT: vrsub.vi v9, v8, 0
-; RVF-NEXT: vand.vv v8, v8, v9
-; RVF-NEXT: vfwcvt.f.xu.v v10, v8
-; RVF-NEXT: vnsrl.wi v8, v10, 23
+; RVF-NEXT: vand.vv v10, v8, v9
+; RVF-NEXT: vfwcvt.f.xu.v v8, v10
+; RVF-NEXT: vnsrl.wi v10, v8, 23
; RVF-NEXT: li a1, 127
-; RVF-NEXT: vsub.vx v8, v8, a1
+; RVF-NEXT: vsub.vx v8, v10, a1
; RVF-NEXT: vse16.v v8, (a0)
; RVF-NEXT: ret
;
@@ -877,11 +877,11 @@ define void @cttz_zero_undef_v8i16(ptr %x, ptr %y) nounwind {
; RVD-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RVD-NEXT: vle16.v v8, (a0)
; RVD-NEXT: vrsub.vi v9, v8, 0
-; RVD-NEXT: vand.vv v8, v8, v9
-; RVD-NEXT: vfwcvt.f.xu.v v10, v8
-; RVD-NEXT: vnsrl.wi v8, v10, 23
+; RVD-NEXT: vand.vv v10, v8, v9
+; RVD-NEXT: vfwcvt.f.xu.v v8, v10
+; RVD-NEXT: vnsrl.wi v10, v8, 23
; RVD-NEXT: li a1, 127
-; RVD-NEXT: vsub.vx v8, v8, a1
+; RVD-NEXT: vsub.vx v8, v10, a1
; RVD-NEXT: vse16.v v8, (a0)
; RVD-NEXT: ret
;
@@ -951,11 +951,11 @@ define void @cttz_zero_undef_v4i32(ptr %x, ptr %y) nounwind {
; RVD-NEXT: vle32.v v8, (a0)
; RVD-NEXT: li a1, 52
; RVD-NEXT: vrsub.vi v9, v8, 0
-; RVD-NEXT: vand.vv v8, v8, v9
-; RVD-NEXT: vfwcvt.f.xu.v v10, v8
-; RVD-NEXT: vnsrl.wx v8, v10, a1
+; RVD-NEXT: vand.vv v10, v8, v9
+; RVD-NEXT: vfwcvt.f.xu.v v8, v10
+; RVD-NEXT: vnsrl.wx v10, v8, a1
; RVD-NEXT: li a1, 1023
-; RVD-NEXT: vsub.vx v8, v8, a1
+; RVD-NEXT: vsub.vx v8, v10, a1
; RVD-NEXT: vse32.v v8, (a0)
; RVD-NEXT: ret
;
@@ -1134,13 +1134,13 @@ define void @cttz_zero_undef_v32i8(ptr %x, ptr %y) nounwind {
; RVF-NEXT: vrsub.vi v10, v8, 0
; RVF-NEXT: vand.vv v8, v8, v10
; RVF-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; RVF-NEXT: vzext.vf2 v12, v8
-; RVF-NEXT: vfwcvt.f.xu.v v16, v12
-; RVF-NEXT: vnsrl.wi v8, v16, 23
+; RVF-NEXT: vzext.vf2 v16, v8
+; RVF-NEXT: vfwcvt.f.xu.v v8, v16
+; RVF-NEXT: vnsrl.wi v16, v8, 23
; RVF-NEXT: vsetvli zero, zero, e8, m2, ta, ma
-; RVF-NEXT: vnsrl.wi v12, v8, 0
+; RVF-NEXT: vnsrl.wi v8, v16, 0
; RVF-NEXT: li a1, 127
-; RVF-NEXT: vsub.vx v8, v12, a1
+; RVF-NEXT: vsub.vx v8, v8, a1
; RVF-NEXT: vse8.v v8, (a0)
; RVF-NEXT: ret
;
@@ -1152,13 +1152,13 @@ define void @cttz_zero_undef_v32i8(ptr %x, ptr %y) nounwind {
; RVD-NEXT: vrsub.vi v10, v8, 0
; RVD-NEXT: vand.vv v8, v8, v10
; RVD-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; RVD-NEXT: vzext.vf2 v12, v8
-; RVD-NEXT: vfwcvt.f.xu.v v16, v12
-; RVD-NEXT: vnsrl.wi v8, v16, 23
+; RVD-NEXT: vzext.vf2 v16, v8
+; RVD-NEXT: vfwcvt.f.xu.v v8, v16
+; RVD-NEXT: vnsrl.wi v16, v8, 23
; RVD-NEXT: vsetvli zero, zero, e8, m2, ta, ma
-; RVD-NEXT: vnsrl.wi v12, v8, 0
+; RVD-NEXT: vnsrl.wi v8, v16, 0
; RVD-NEXT: li a1, 127
-; RVD-NEXT: vsub.vx v8, v12, a1
+; RVD-NEXT: vsub.vx v8, v8, a1
; RVD-NEXT: vse8.v v8, (a0)
; RVD-NEXT: ret
;
@@ -1212,11 +1212,11 @@ define void @cttz_zero_undef_v16i16(ptr %x, ptr %y) nounwind {
; RVF-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RVF-NEXT: vle16.v v8, (a0)
; RVF-NEXT: vrsub.vi v10, v8, 0
-; RVF-NEXT: vand.vv v8, v8, v10
-; RVF-NEXT: vfwcvt.f.xu.v v12, v8
-; RVF-NEXT: vnsrl.wi v8, v12, 23
+; RVF-NEXT: vand.vv v12, v8, v10
+; RVF-NEXT: vfwcvt.f.xu.v v8, v12
+; RVF-NEXT: vnsrl.wi v12, v8, 23
; RVF-NEXT: li a1, 127
-; RVF-NEXT: vsub.vx v8, v8, a1
+; RVF-NEXT: vsub.vx v8, v12, a1
; RVF-NEXT: vse16.v v8, (a0)
; RVF-NEXT: ret
;
@@ -1225,11 +1225,11 @@ define void @cttz_zero_undef_v16i16(ptr %x, ptr %y) nounwind {
; RVD-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RVD-NEXT: vle16.v v8, (a0)
; RVD-NEXT: vrsub.vi v10, v8, 0
-; RVD-NEXT: vand.vv v8, v8, v10
-; RVD-NEXT: vfwcvt.f.xu.v v12, v8
-; RVD-NEXT: vnsrl.wi v8, v12, 23
+; RVD-NEXT: vand.vv v12, v8, v10
+; RVD-NEXT: vfwcvt.f.xu.v v8, v12
+; RVD-NEXT: vnsrl.wi v12, v8, 23
; RVD-NEXT: li a1, 127
-; RVD-NEXT: vsub.vx v8, v8, a1
+; RVD-NEXT: vsub.vx v8, v12, a1
; RVD-NEXT: vse16.v v8, (a0)
; RVD-NEXT: ret
;
@@ -1299,11 +1299,11 @@ define void @cttz_zero_undef_v8i32(ptr %x, ptr %y) nounwind {
; RVD-NEXT: vle32.v v8, (a0)
; RVD-NEXT: li a1, 52
; RVD-NEXT: vrsub.vi v10, v8, 0
-; RVD-NEXT: vand.vv v8, v8, v10
-; RVD-NEXT: vfwcvt.f.xu.v v12, v8
-; RVD-NEXT: vnsrl.wx v8, v12, a1
+; RVD-NEXT: vand.vv v12, v8, v10
+; RVD-NEXT: vfwcvt.f.xu.v v8, v12
+; RVD-NEXT: vnsrl.wx v12, v8, a1
; RVD-NEXT: li a1, 1023
-; RVD-NEXT: vsub.vx v8, v8, a1
+; RVD-NEXT: vsub.vx v8, v12, a1
; RVD-NEXT: vse32.v v8, (a0)
; RVD-NEXT: ret
;
@@ -1414,10 +1414,10 @@ define void @cttz_zero_undef_v4i64(ptr %x, ptr %y) nounwind {
; RVF-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RVF-NEXT: vfncvt.f.xu.w v10, v8
; RVF-NEXT: fsrm a1
-; RVF-NEXT: vsrl.vi v8, v10, 23
+; RVF-NEXT: vsrl.vi v10, v10, 23
; RVF-NEXT: li a1, 127
-; RVF-NEXT: vwsubu.vx v10, v8, a1
-; RVF-NEXT: vse64.v v10, (a0)
+; RVF-NEXT: vwsubu.vx v8, v10, a1
+; RVF-NEXT: vse64.v v8, (a0)
; RVF-NEXT: ret
;
; RVD-LABEL: cttz_zero_undef_v4i64:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll
index ff52f5d2039e1..78ad10033feaa 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll
@@ -36,9 +36,9 @@ define void @fpext_v8f16_v8f32(ptr %x, ptr %y) {
; CHECK-LABEL: fpext_v8f16_v8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vfwcvt.f.f.v v10, v8
-; CHECK-NEXT: vse32.v v10, (a1)
+; CHECK-NEXT: vle16.v v10, (a0)
+; CHECK-NEXT: vfwcvt.f.f.v v8, v10
+; CHECK-NEXT: vse32.v v8, (a1)
; CHECK-NEXT: ret
%a = load <8 x half>, ptr %x
%d = fpext <8 x half> %a to <8 x float>
@@ -51,10 +51,10 @@ define void @fpext_v8f16_v8f64(ptr %x, ptr %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vfwcvt.f.f.v v10, v8
+; CHECK-NEXT: vfwcvt.f.f.v v12, v8
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.f.v v12, v10
-; CHECK-NEXT: vse64.v v12, (a1)
+; CHECK-NEXT: vfwcvt.f.f.v v8, v12
+; CHECK-NEXT: vse64.v v8, (a1)
; CHECK-NEXT: ret
%a = load <8 x half>, ptr %x
%d = fpext <8 x half> %a to <8 x double>
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
index 7817f010c4deb..9102cd6ed7036 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
@@ -90,10 +90,11 @@ define <8 x float> @interleave_v4f32(<4 x float> %x, <4 x float> %y) {
; V128-LABEL: interleave_v4f32:
; V128: # %bb.0:
; V128-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; V128-NEXT: vwaddu.vv v10, v8, v9
+; V128-NEXT: vmv1r.v v10, v9
+; V128-NEXT: vmv1r.v v11, v8
+; V128-NEXT: vwaddu.vv v8, v11, v10
; V128-NEXT: li a0, -1
-; V128-NEXT: vwmaccu.vx v10, a0, v9
-; V128-NEXT: vmv2r.v v8, v10
+; V128-NEXT: vwmaccu.vx v8, a0, v10
; V128-NEXT: ret
;
; V512-LABEL: interleave_v4f32:
@@ -113,10 +114,11 @@ define <16 x half> @interleave_v8f16(<8 x half> %x, <8 x half> %y) {
; V128-LABEL: interleave_v8f16:
; V128: # %bb.0:
; V128-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; V128-NEXT: vwaddu.vv v10, v9, v8
+; V128-NEXT: vmv1r.v v10, v9
+; V128-NEXT: vmv1r.v v11, v8
+; V128-NEXT: vwaddu.vv v8, v10, v11
; V128-NEXT: li a0, -1
-; V128-NEXT: vwmaccu.vx v10, a0, v8
-; V128-NEXT: vmv2r.v v8, v10
+; V128-NEXT: vwmaccu.vx v8, a0, v11
; V128-NEXT: ret
;
; V512-LABEL: interleave_v8f16:
@@ -135,10 +137,11 @@ define <16 x float> @interleave_v8f32(<8 x float> %x, <8 x float> %y) {
; V128-LABEL: interleave_v8f32:
; V128: # %bb.0:
; V128-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; V128-NEXT: vwaddu.vv v12, v8, v10
+; V128-NEXT: vmv2r.v v12, v10
+; V128-NEXT: vmv2r.v v14, v8
+; V128-NEXT: vwaddu.vv v8, v14, v12
; V128-NEXT: li a0, -1
-; V128-NEXT: vwmaccu.vx v12, a0, v10
-; V128-NEXT: vmv4r.v v8, v12
+; V128-NEXT: vwmaccu.vx v8, a0, v12
; V128-NEXT: ret
;
; V512-LABEL: interleave_v8f32:
@@ -157,10 +160,11 @@ define <32 x half> @interleave_v16f16(<16 x half> %x, <16 x half> %y) {
; V128-LABEL: interleave_v16f16:
; V128: # %bb.0:
; V128-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; V128-NEXT: vwaddu.vv v12, v8, v10
+; V128-NEXT: vmv2r.v v12, v10
+; V128-NEXT: vmv2r.v v14, v8
+; V128-NEXT: vwaddu.vv v8, v14, v12
; V128-NEXT: li a0, -1
-; V128-NEXT: vwmaccu.vx v12, a0, v10
-; V128-NEXT: vmv4r.v v8, v12
+; V128-NEXT: vwmaccu.vx v8, a0, v12
; V128-NEXT: ret
;
; V512-LABEL: interleave_v16f16:
@@ -179,19 +183,21 @@ define <32 x float> @interleave_v16f32(<16 x float> %x, <16 x float> %y) {
; V128-LABEL: interleave_v16f32:
; V128: # %bb.0:
; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; V128-NEXT: vwaddu.vv v16, v8, v12
+; V128-NEXT: vmv4r.v v16, v12
+; V128-NEXT: vmv4r.v v20, v8
+; V128-NEXT: vwaddu.vv v8, v20, v16
; V128-NEXT: li a0, -1
-; V128-NEXT: vwmaccu.vx v16, a0, v12
-; V128-NEXT: vmv8r.v v8, v16
+; V128-NEXT: vwmaccu.vx v8, a0, v16
; V128-NEXT: ret
;
; V512-LABEL: interleave_v16f32:
; V512: # %bb.0:
; V512-NEXT: vsetivli zero, 16, e32, m1, ta, ma
-; V512-NEXT: vwaddu.vv v10, v8, v9
+; V512-NEXT: vmv1r.v v10, v9
+; V512-NEXT: vmv1r.v v11, v8
+; V512-NEXT: vwaddu.vv v8, v11, v10
; V512-NEXT: li a0, -1
-; V512-NEXT: vwmaccu.vx v10, a0, v9
-; V512-NEXT: vmv2r.v v8, v10
+; V512-NEXT: vwmaccu.vx v8, a0, v10
; V512-NEXT: ret
%a = shufflevector <16 x float> %x, <16 x float> %y, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
ret <32 x float> %a
@@ -200,22 +206,26 @@ define <32 x float> @interleave_v16f32(<16 x float> %x, <16 x float> %y) {
define <64 x half> @interleave_v32f16(<32 x half> %x, <32 x half> %y) {
; V128-LABEL: interleave_v32f16:
; V128: # %bb.0:
+; V128-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; V128-NEXT: vmv4r.v v16, v12
+; V128-NEXT: vmv4r.v v20, v8
; V128-NEXT: li a0, 32
; V128-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; V128-NEXT: vwaddu.vv v16, v8, v12
+; V128-NEXT: vwaddu.vv v8, v20, v16
; V128-NEXT: li a0, -1
-; V128-NEXT: vwmaccu.vx v16, a0, v12
-; V128-NEXT: vmv8r.v v8, v16
+; V128-NEXT: vwmaccu.vx v8, a0, v16
; V128-NEXT: ret
;
; V512-LABEL: interleave_v32f16:
; V512: # %bb.0:
+; V512-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; V512-NEXT: vmv1r.v v10, v9
+; V512-NEXT: vmv1r.v v11, v8
; V512-NEXT: li a0, 32
; V512-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; V512-NEXT: vwaddu.vv v10, v8, v9
+; V512-NEXT: vwaddu.vv v8, v11, v10
; V512-NEXT: li a0, -1
-; V512-NEXT: vwmaccu.vx v10, a0, v9
-; V512-NEXT: vmv2r.v v8, v10
+; V512-NEXT: vwmaccu.vx v8, a0, v10
; V512-NEXT: ret
%a = shufflevector <32 x half> %x, <32 x half> %y, <64 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
ret <64 x half> %a
@@ -263,12 +273,14 @@ define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) {
;
; V512-LABEL: interleave_v32f32:
; V512: # %bb.0:
+; V512-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; V512-NEXT: vmv2r.v v12, v10
+; V512-NEXT: vmv2r.v v14, v8
; V512-NEXT: li a0, 32
; V512-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; V512-NEXT: vwaddu.vv v12, v8, v10
+; V512-NEXT: vwaddu.vv v8, v14, v12
; V512-NEXT: li a0, -1
-; V512-NEXT: vwmaccu.vx v12, a0, v10
-; V512-NEXT: vmv4r.v v8, v12
+; V512-NEXT: vwmaccu.vx v8, a0, v12
; V512-NEXT: ret
%a = shufflevector <32 x float> %x, <32 x float> %y, <64 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
ret <64 x float> %a
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll
index c14eae0b1de61..abb929eaaf6e6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll
@@ -17,13 +17,13 @@ define void @fcmp_oeq_vv_v8f16(ptr %x, ptr %y, ptr %z) {
; ZVFHMIN-LABEL: fcmp_oeq_vv_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-NEXT: vle16.v v12, (a0)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v10
-; ZVFHMIN-NEXT: vsm.v v8, (a2)
+; ZVFHMIN-NEXT: vmfeq.vv v12, v10, v8
+; ZVFHMIN-NEXT: vsm.v v12, (a2)
; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
%b = load <8 x half>, ptr %y
@@ -45,13 +45,13 @@ define void @fcmp_oeq_vv_v8f16_nonans(ptr %x, ptr %y, ptr %z) {
; ZVFHMIN-LABEL: fcmp_oeq_vv_v8f16_nonans:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-NEXT: vle16.v v12, (a0)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v10
-; ZVFHMIN-NEXT: vsm.v v8, (a2)
+; ZVFHMIN-NEXT: vmfeq.vv v12, v10, v8
+; ZVFHMIN-NEXT: vsm.v v12, (a2)
; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
%b = load <8 x half>, ptr %y
@@ -173,13 +173,13 @@ define void @fcmp_olt_vv_v16f16(ptr %x, ptr %y, ptr %z) {
; ZVFHMIN-LABEL: fcmp_olt_vv_v16f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v10, (a0)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vle16.v v12, (a1)
+; ZVFHMIN-NEXT: vle16.v v16, (a0)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12
-; ZVFHMIN-NEXT: vsm.v v8, (a2)
+; ZVFHMIN-NEXT: vmflt.vv v16, v12, v8
+; ZVFHMIN-NEXT: vsm.v v16, (a2)
; ZVFHMIN-NEXT: ret
%a = load <16 x half>, ptr %x
%b = load <16 x half>, ptr %y
@@ -201,13 +201,13 @@ define void @fcmp_olt_vv_v16f16_nonans(ptr %x, ptr %y, ptr %z) {
; ZVFHMIN-LABEL: fcmp_olt_vv_v16f16_nonans:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v10, (a0)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vle16.v v12, (a1)
+; ZVFHMIN-NEXT: vle16.v v16, (a0)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12
-; ZVFHMIN-NEXT: vsm.v v8, (a2)
+; ZVFHMIN-NEXT: vmflt.vv v16, v12, v8
+; ZVFHMIN-NEXT: vsm.v v16, (a2)
; ZVFHMIN-NEXT: ret
%a = load <16 x half>, ptr %x
%b = load <16 x half>, ptr %y
@@ -314,13 +314,13 @@ define void @fcmp_ule_vv_v32f16(ptr %x, ptr %y, ptr %z) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: li a3, 32
; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: vle16.v v12, (a1)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vle16.v v16, (a0)
+; ZVFHMIN-NEXT: vle16.v v24, (a1)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vmflt.vv v8, v24, v16
-; ZVFHMIN-NEXT: vmnot.m v8, v8
+; ZVFHMIN-NEXT: vmflt.vv v24, v16, v8
+; ZVFHMIN-NEXT: vmnot.m v8, v24
; ZVFHMIN-NEXT: vsm.v v8, (a2)
; ZVFHMIN-NEXT: ret
%a = load <32 x half>, ptr %x
@@ -345,13 +345,13 @@ define void @fcmp_ule_vv_v32f16_nonans(ptr %x, ptr %y, ptr %z) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: li a3, 32
; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v12, (a0)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vle16.v v16, (a1)
+; ZVFHMIN-NEXT: vle16.v v24, (a0)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vmfle.vv v8, v24, v16
-; ZVFHMIN-NEXT: vsm.v v8, (a2)
+; ZVFHMIN-NEXT: vmfle.vv v24, v16, v8
+; ZVFHMIN-NEXT: vsm.v v24, (a2)
; ZVFHMIN-NEXT: ret
%a = load <32 x half>, ptr %x
%b = load <32 x half>, ptr %y
@@ -643,14 +643,14 @@ define void @fcmp_oeq_vf_v8f16(ptr %x, half %y, ptr %z) {
; ZVFHMIN-LABEL: fcmp_oeq_vf_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
-; ZVFHMIN-NEXT: vmv.v.x v9, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vmv.v.x v12, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v12
-; ZVFHMIN-NEXT: vsm.v v8, (a1)
+; ZVFHMIN-NEXT: vmfeq.vv v12, v8, v10
+; ZVFHMIN-NEXT: vsm.v v12, (a1)
; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
%b = insertelement <8 x half> poison, half %y, i32 0
@@ -672,14 +672,14 @@ define void @fcmp_oeq_vf_v8f16_nonans(ptr %x, half %y, ptr %z) {
; ZVFHMIN-LABEL: fcmp_oeq_vf_v8f16_nonans:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
-; ZVFHMIN-NEXT: vmv.v.x v9, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vmv.v.x v12, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v12
-; ZVFHMIN-NEXT: vsm.v v8, (a1)
+; ZVFHMIN-NEXT: vmfeq.vv v12, v8, v10
+; ZVFHMIN-NEXT: vsm.v v12, (a1)
; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
%b = insertelement <8 x half> poison, half %y, i32 0
@@ -801,14 +801,14 @@ define void @fcmp_olt_vf_v16f16(ptr %x, half %y, ptr %z) {
; ZVFHMIN-LABEL: fcmp_olt_vf_v16f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v12, (a0)
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
-; ZVFHMIN-NEXT: vmv.v.x v10, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vmv.v.x v16, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vmflt.vv v8, v12, v16
-; ZVFHMIN-NEXT: vsm.v v8, (a1)
+; ZVFHMIN-NEXT: vmflt.vv v16, v8, v12
+; ZVFHMIN-NEXT: vsm.v v16, (a1)
; ZVFHMIN-NEXT: ret
%a = load <16 x half>, ptr %x
%b = insertelement <16 x half> poison, half %y, i32 0
@@ -830,14 +830,14 @@ define void @fcmp_olt_vf_v16f16_nonans(ptr %x, half %y, ptr %z) {
; ZVFHMIN-LABEL: fcmp_olt_vf_v16f16_nonans:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v12, (a0)
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
-; ZVFHMIN-NEXT: vmv.v.x v10, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vmv.v.x v16, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vmflt.vv v8, v12, v16
-; ZVFHMIN-NEXT: vsm.v v8, (a1)
+; ZVFHMIN-NEXT: vmflt.vv v16, v8, v12
+; ZVFHMIN-NEXT: vsm.v v16, (a1)
; ZVFHMIN-NEXT: ret
%a = load <16 x half>, ptr %x
%b = insertelement <16 x half> poison, half %y, i32 0
@@ -944,14 +944,14 @@ define void @fcmp_ule_vf_v32f16(ptr %x, half %y, ptr %z) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: li a2, 32
; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v16, (a0)
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
-; ZVFHMIN-NEXT: vmv.v.x v12, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vmv.v.x v24, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vmflt.vv v8, v24, v16
-; ZVFHMIN-NEXT: vmnot.m v8, v8
+; ZVFHMIN-NEXT: vmflt.vv v24, v16, v8
+; ZVFHMIN-NEXT: vmnot.m v8, v24
; ZVFHMIN-NEXT: vsm.v v8, (a1)
; ZVFHMIN-NEXT: ret
%a = load <32 x half>, ptr %x
@@ -976,14 +976,14 @@ define void @fcmp_ule_vf_v32f16_nonans(ptr %x, half %y, ptr %z) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: li a2, 32
; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v16, (a0)
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
-; ZVFHMIN-NEXT: vmv.v.x v12, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vmv.v.x v24, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vmfle.vv v8, v16, v24
-; ZVFHMIN-NEXT: vsm.v v8, (a1)
+; ZVFHMIN-NEXT: vmfle.vv v24, v8, v16
+; ZVFHMIN-NEXT: vsm.v v24, (a1)
; ZVFHMIN-NEXT: ret
%a = load <32 x half>, ptr %x
%b = insertelement <32 x half> poison, half %y, i32 0
@@ -1284,14 +1284,14 @@ define void @fcmp_oeq_fv_v8f16(ptr %x, half %y, ptr %z) {
; ZVFHMIN-LABEL: fcmp_oeq_fv_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
-; ZVFHMIN-NEXT: vmv.v.x v9, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vmv.v.x v12, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v10
-; ZVFHMIN-NEXT: vsm.v v8, (a1)
+; ZVFHMIN-NEXT: vmfeq.vv v12, v10, v8
+; ZVFHMIN-NEXT: vsm.v v12, (a1)
; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
%b = insertelement <8 x half> poison, half %y, i32 0
@@ -1313,14 +1313,14 @@ define void @fcmp_oeq_fv_v8f16_nonans(ptr %x, half %y, ptr %z) {
; ZVFHMIN-LABEL: fcmp_oeq_fv_v8f16_nonans:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
-; ZVFHMIN-NEXT: vmv.v.x v9, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vmv.v.x v12, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v10
-; ZVFHMIN-NEXT: vsm.v v8, (a1)
+; ZVFHMIN-NEXT: vmfeq.vv v12, v10, v8
+; ZVFHMIN-NEXT: vsm.v v12, (a1)
; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
%b = insertelement <8 x half> poison, half %y, i32 0
@@ -1442,14 +1442,14 @@ define void @fcmp_olt_fv_v16f16(ptr %x, half %y, ptr %z) {
; ZVFHMIN-LABEL: fcmp_olt_fv_v16f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v12, (a0)
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
-; ZVFHMIN-NEXT: vmv.v.x v10, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vmv.v.x v16, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12
-; ZVFHMIN-NEXT: vsm.v v8, (a1)
+; ZVFHMIN-NEXT: vmflt.vv v16, v12, v8
+; ZVFHMIN-NEXT: vsm.v v16, (a1)
; ZVFHMIN-NEXT: ret
%a = load <16 x half>, ptr %x
%b = insertelement <16 x half> poison, half %y, i32 0
@@ -1471,14 +1471,14 @@ define void @fcmp_olt_fv_v16f16_nonans(ptr %x, half %y, ptr %z) {
; ZVFHMIN-LABEL: fcmp_olt_fv_v16f16_nonans:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v12, (a0)
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
-; ZVFHMIN-NEXT: vmv.v.x v10, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vmv.v.x v16, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12
-; ZVFHMIN-NEXT: vsm.v v8, (a1)
+; ZVFHMIN-NEXT: vmflt.vv v16, v12, v8
+; ZVFHMIN-NEXT: vsm.v v16, (a1)
; ZVFHMIN-NEXT: ret
%a = load <16 x half>, ptr %x
%b = insertelement <16 x half> poison, half %y, i32 0
@@ -1585,14 +1585,14 @@ define void @fcmp_ule_fv_v32f16(ptr %x, half %y, ptr %z) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: li a2, 32
; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v16, (a0)
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
-; ZVFHMIN-NEXT: vmv.v.x v12, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vmv.v.x v24, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vmflt.vv v8, v16, v24
-; ZVFHMIN-NEXT: vmnot.m v8, v8
+; ZVFHMIN-NEXT: vmflt.vv v24, v8, v16
+; ZVFHMIN-NEXT: vmnot.m v8, v24
; ZVFHMIN-NEXT: vsm.v v8, (a1)
; ZVFHMIN-NEXT: ret
%a = load <32 x half>, ptr %x
@@ -1617,14 +1617,14 @@ define void @fcmp_ule_fv_v32f16_nonans(ptr %x, half %y, ptr %z) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: li a2, 32
; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v16, (a0)
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
-; ZVFHMIN-NEXT: vmv.v.x v12, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vmv.v.x v24, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vmfle.vv v8, v24, v16
-; ZVFHMIN-NEXT: vsm.v v8, (a1)
+; ZVFHMIN-NEXT: vmfle.vv v24, v16, v8
+; ZVFHMIN-NEXT: vsm.v v24, (a1)
; ZVFHMIN-NEXT: ret
%a = load <32 x half>, ptr %x
%b = insertelement <32 x half> poison, half %y, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
index a749736097331..4812b27f442c0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
@@ -381,15 +381,15 @@ define <16 x float> @shuffle_disjoint_lanes(<16 x float> %v, <16 x float> %w) {
; CHECK-NEXT: lui a0, %hi(.LCPI30_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI30_0)
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT: vle8.v v16, (a0)
+; CHECK-NEXT: vle8.v v18, (a0)
; CHECK-NEXT: lui a0, 11
; CHECK-NEXT: addi a0, a0, -1366
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vsext.vf2 v18, v16
+; CHECK-NEXT: vsext.vf2 v16, v18
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vrgatherei16.vv v8, v12, v18
+; CHECK-NEXT: vrgatherei16.vv v8, v12, v16
; CHECK-NEXT: ret
%out = shufflevector <16 x float> %v, <16 x float> %w, <16 x i32> <i32 11, i32 15, i32 7, i32 3, i32 26, i32 30, i32 22, i32 18, i32 9, i32 13, i32 5, i32 1, i32 24, i32 28, i32 20, i32 16>
ret <16 x float> %out
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index c885b3c03270c..38df622998bf9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -9,12 +9,12 @@ define void @fadd_v8bf16(ptr %x, ptr %y) {
; CHECK-LABEL: fadd_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a1)
-; CHECK-NEXT: vle16.v v9, (a0)
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vle16.v v10, (a1)
+; CHECK-NEXT: vle16.v v12, (a0)
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfadd.vv v8, v12, v10
+; CHECK-NEXT: vfadd.vv v8, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vse16.v v10, (a0)
@@ -30,12 +30,12 @@ define void @fadd_v6bf16(ptr %x, ptr %y) {
; CHECK-LABEL: fadd_v6bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a1)
-; CHECK-NEXT: vle16.v v9, (a0)
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vle16.v v10, (a1)
+; CHECK-NEXT: vle16.v v12, (a0)
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfadd.vv v8, v12, v10
+; CHECK-NEXT: vfadd.vv v8, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vse16.v v10, (a0)
@@ -60,12 +60,12 @@ define void @fadd_v8f16(ptr %x, ptr %y) {
; ZVFHMIN-LABEL: fadd_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-NEXT: vle16.v v12, (a0)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfadd.vv v8, v12, v10
+; ZVFHMIN-NEXT: vfadd.vv v8, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
@@ -90,12 +90,12 @@ define void @fadd_v6f16(ptr %x, ptr %y) {
; ZVFHMIN-LABEL: fadd_v6f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-NEXT: vle16.v v12, (a0)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfadd.vv v8, v12, v10
+; ZVFHMIN-NEXT: vfadd.vv v8, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
@@ -143,12 +143,12 @@ define void @fsub_v8bf16(ptr %x, ptr %y) {
; CHECK-LABEL: fsub_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a1)
-; CHECK-NEXT: vle16.v v9, (a0)
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vle16.v v10, (a1)
+; CHECK-NEXT: vle16.v v12, (a0)
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfsub.vv v8, v12, v10
+; CHECK-NEXT: vfsub.vv v8, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vse16.v v10, (a0)
@@ -164,12 +164,12 @@ define void @fsub_v6bf16(ptr %x, ptr %y) {
; CHECK-LABEL: fsub_v6bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a1)
-; CHECK-NEXT: vle16.v v9, (a0)
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vle16.v v10, (a1)
+; CHECK-NEXT: vle16.v v12, (a0)
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfsub.vv v8, v12, v10
+; CHECK-NEXT: vfsub.vv v8, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vse16.v v10, (a0)
@@ -194,12 +194,12 @@ define void @fsub_v8f16(ptr %x, ptr %y) {
; ZVFHMIN-LABEL: fsub_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-NEXT: vle16.v v12, (a0)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfsub.vv v8, v12, v10
+; ZVFHMIN-NEXT: vfsub.vv v8, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
@@ -224,12 +224,12 @@ define void @fsub_v6f16(ptr %x, ptr %y) {
; ZVFHMIN-LABEL: fsub_v6f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-NEXT: vle16.v v12, (a0)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfsub.vv v8, v12, v10
+; ZVFHMIN-NEXT: vfsub.vv v8, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
@@ -277,12 +277,12 @@ define void @fmul_v8bf16(ptr %x, ptr %y) {
; CHECK-LABEL: fmul_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a1)
-; CHECK-NEXT: vle16.v v9, (a0)
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vle16.v v10, (a1)
+; CHECK-NEXT: vle16.v v12, (a0)
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmul.vv v8, v12, v10
+; CHECK-NEXT: vfmul.vv v8, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vse16.v v10, (a0)
@@ -298,12 +298,12 @@ define void @fmul_v6bf16(ptr %x, ptr %y) {
; CHECK-LABEL: fmul_v6bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a1)
-; CHECK-NEXT: vle16.v v9, (a0)
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vle16.v v10, (a1)
+; CHECK-NEXT: vle16.v v12, (a0)
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmul.vv v8, v12, v10
+; CHECK-NEXT: vfmul.vv v8, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vse16.v v10, (a0)
@@ -328,12 +328,12 @@ define void @fmul_v8f16(ptr %x, ptr %y) {
; ZVFHMIN-LABEL: fmul_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-NEXT: vle16.v v12, (a0)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmul.vv v8, v12, v10
+; ZVFHMIN-NEXT: vfmul.vv v8, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
@@ -358,12 +358,12 @@ define void @fmul_v6f16(ptr %x, ptr %y) {
; ZVFHMIN-LABEL: fmul_v6f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-NEXT: vle16.v v12, (a0)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmul.vv v8, v12, v10
+; ZVFHMIN-NEXT: vfmul.vv v8, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
@@ -411,12 +411,12 @@ define void @fdiv_v8bf16(ptr %x, ptr %y) {
; CHECK-LABEL: fdiv_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a1)
-; CHECK-NEXT: vle16.v v9, (a0)
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vle16.v v10, (a1)
+; CHECK-NEXT: vle16.v v12, (a0)
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfdiv.vv v8, v12, v10
+; CHECK-NEXT: vfdiv.vv v8, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vse16.v v10, (a0)
@@ -432,12 +432,12 @@ define void @fdiv_v6bf16(ptr %x, ptr %y) {
; CHECK-LABEL: fdiv_v6bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a1)
-; CHECK-NEXT: vle16.v v9, (a0)
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vle16.v v10, (a1)
+; CHECK-NEXT: vle16.v v12, (a0)
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfdiv.vv v8, v12, v10
+; CHECK-NEXT: vfdiv.vv v8, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vse16.v v10, (a0)
@@ -462,12 +462,12 @@ define void @fdiv_v8f16(ptr %x, ptr %y) {
; ZVFHMIN-LABEL: fdiv_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-NEXT: vle16.v v12, (a0)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v8, v12, v10
+; ZVFHMIN-NEXT: vfdiv.vv v8, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
@@ -492,12 +492,12 @@ define void @fdiv_v6f16(ptr %x, ptr %y) {
; ZVFHMIN-LABEL: fdiv_v6f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-NEXT: vle16.v v12, (a0)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v8, v12, v10
+; ZVFHMIN-NEXT: vfdiv.vv v8, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
@@ -1299,10 +1299,10 @@ define void @sqrt_v8bf16(ptr %x) {
; CHECK-LABEL: sqrt_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vle16.v v10, (a0)
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfsqrt.v v8, v10
+; CHECK-NEXT: vfsqrt.v v8, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vse16.v v10, (a0)
@@ -1317,10 +1317,10 @@ define void @sqrt_v6bf16(ptr %x) {
; CHECK-LABEL: sqrt_v6bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vle16.v v10, (a0)
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfsqrt.v v8, v10
+; CHECK-NEXT: vfsqrt.v v8, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vse16.v v10, (a0)
@@ -1343,10 +1343,10 @@ define void @sqrt_v8f16(ptr %x) {
; ZVFHMIN-LABEL: sqrt_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfsqrt.v v8, v10
+; ZVFHMIN-NEXT: vfsqrt.v v8, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
@@ -1369,10 +1369,10 @@ define void @sqrt_v6f16(ptr %x) {
; ZVFHMIN-LABEL: sqrt_v6f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfsqrt.v v8, v10
+; ZVFHMIN-NEXT: vfsqrt.v v8, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
@@ -1415,17 +1415,17 @@ define void @fma_v8bf16(ptr %x, ptr %y, ptr %z) {
; CHECK-LABEL: fma_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a2)
-; CHECK-NEXT: vle16.v v9, (a0)
-; CHECK-NEXT: vle16.v v10, (a1)
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9
+; CHECK-NEXT: vle16.v v10, (a2)
+; CHECK-NEXT: vle16.v v12, (a0)
+; CHECK-NEXT: vle16.v v14, (a1)
; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v14
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmadd.vv v8, v14, v12
+; CHECK-NEXT: vfmadd.vv v12, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT: vse16.v v10, (a0)
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
%a = load <8 x bfloat>, ptr %x
%b = load <8 x bfloat>, ptr %y
@@ -1439,17 +1439,17 @@ define void @fma_v6bf16(ptr %x, ptr %y, ptr %z) {
; CHECK-LABEL: fma_v6bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a2)
-; CHECK-NEXT: vle16.v v9, (a0)
-; CHECK-NEXT: vle16.v v10, (a1)
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9
+; CHECK-NEXT: vle16.v v10, (a2)
+; CHECK-NEXT: vle16.v v12, (a0)
+; CHECK-NEXT: vle16.v v14, (a1)
; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v14
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmadd.vv v8, v14, v12
+; CHECK-NEXT: vfmadd.vv v12, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT: vse16.v v10, (a0)
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
%a = load <6 x bfloat>, ptr %x
%b = load <6 x bfloat>, ptr %y
@@ -1473,17 +1473,17 @@ define void @fma_v8f16(ptr %x, ptr %y, ptr %z) {
; ZVFHMIN-LABEL: fma_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a2)
-; ZVFHMIN-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-NEXT: vle16.v v10, (a1)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
+; ZVFHMIN-NEXT: vle16.v v10, (a2)
+; ZVFHMIN-NEXT: vle16.v v12, (a0)
+; ZVFHMIN-NEXT: vle16.v v14, (a1)
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v14
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v12
+; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT: vse16.v v10, (a0)
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: vse16.v v8, (a0)
; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
%b = load <8 x half>, ptr %y
@@ -1507,17 +1507,17 @@ define void @fma_v6f16(ptr %x, ptr %y, ptr %z) {
; ZVFHMIN-LABEL: fma_v6f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a2)
-; ZVFHMIN-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-NEXT: vle16.v v10, (a1)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
+; ZVFHMIN-NEXT: vle16.v v10, (a2)
+; ZVFHMIN-NEXT: vle16.v v12, (a0)
+; ZVFHMIN-NEXT: vle16.v v14, (a1)
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v14
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v12
+; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT: vse16.v v10, (a0)
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: vse16.v v8, (a0)
; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
@@ -1568,18 +1568,18 @@ define void @fmsub_v8bf16(ptr %x, ptr %y, ptr %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a2)
-; CHECK-NEXT: vle16.v v9, (a0)
-; CHECK-NEXT: vle16.v v10, (a1)
+; CHECK-NEXT: vle16.v v10, (a0)
+; CHECK-NEXT: vle16.v v14, (a1)
; CHECK-NEXT: lui a1, 8
-; CHECK-NEXT: vxor.vx v8, v8, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
-; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8
+; CHECK-NEXT: vxor.vx v12, v8, a1
; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v14
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmadd.vv v8, v12, v14
+; CHECK-NEXT: vfmadd.vv v12, v8, v10
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT: vse16.v v10, (a0)
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
%a = load <8 x bfloat>, ptr %x
%b = load <8 x bfloat>, ptr %y
@@ -1595,18 +1595,18 @@ define void @fmsub_v6bf16(ptr %x, ptr %y, ptr %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a2)
-; CHECK-NEXT: vle16.v v9, (a0)
-; CHECK-NEXT: vle16.v v10, (a1)
+; CHECK-NEXT: vle16.v v10, (a0)
+; CHECK-NEXT: vle16.v v14, (a1)
; CHECK-NEXT: lui a1, 8
-; CHECK-NEXT: vxor.vx v8, v8, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
-; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8
+; CHECK-NEXT: vxor.vx v12, v8, a1
; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v14
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmadd.vv v8, v12, v14
+; CHECK-NEXT: vfmadd.vv v12, v8, v10
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT: vse16.v v10, (a0)
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
%a = load <6 x bfloat>, ptr %x
%b = load <6 x bfloat>, ptr %y
@@ -1632,18 +1632,18 @@ define void @fmsub_v8f16(ptr %x, ptr %y, ptr %z) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a2)
-; ZVFHMIN-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
+; ZVFHMIN-NEXT: vle16.v v14, (a1)
; ZVFHMIN-NEXT: lui a1, 8
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
+; ZVFHMIN-NEXT: vxor.vx v12, v8, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v14
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v8, v12, v14
+; ZVFHMIN-NEXT: vfmadd.vv v12, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT: vse16.v v10, (a0)
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: vse16.v v8, (a0)
; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
%b = load <8 x half>, ptr %y
@@ -1669,18 +1669,18 @@ define void @fmsub_v6f16(ptr %x, ptr %y, ptr %z) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a2)
-; ZVFHMIN-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
+; ZVFHMIN-NEXT: vle16.v v14, (a1)
; ZVFHMIN-NEXT: lui a1, 8
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
+; ZVFHMIN-NEXT: vxor.vx v12, v8, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v14
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v8, v12, v14
+; ZVFHMIN-NEXT: vfmadd.vv v12, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT: vse16.v v10, (a0)
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: vse16.v v8, (a0)
; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
@@ -1734,12 +1734,12 @@ define void @fadd_v16bf16(ptr %x, ptr %y) {
; CHECK-LABEL: fadd_v16bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT: vle16.v v8, (a1)
-; CHECK-NEXT: vle16.v v10, (a0)
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vle16.v v12, (a1)
+; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v16
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfadd.vv v8, v16, v12
+; CHECK-NEXT: vfadd.vv v8, v12, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v12, v8
; CHECK-NEXT: vse16.v v12, (a0)
@@ -1764,12 +1764,12 @@ define void @fadd_v16f16(ptr %x, ptr %y) {
; ZVFHMIN-LABEL: fadd_v16f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v10, (a0)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vle16.v v12, (a1)
+; ZVFHMIN-NEXT: vle16.v v16, (a0)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfadd.vv v8, v16, v12
+; ZVFHMIN-NEXT: vfadd.vv v8, v12, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v8
; ZVFHMIN-NEXT: vse16.v v12, (a0)
@@ -1817,12 +1817,12 @@ define void @fsub_v16bf16(ptr %x, ptr %y) {
; CHECK-LABEL: fsub_v16bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT: vle16.v v8, (a1)
-; CHECK-NEXT: vle16.v v10, (a0)
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vle16.v v12, (a1)
+; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v16
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfsub.vv v8, v16, v12
+; CHECK-NEXT: vfsub.vv v8, v12, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v12, v8
; CHECK-NEXT: vse16.v v12, (a0)
@@ -1847,12 +1847,12 @@ define void @fsub_v16f16(ptr %x, ptr %y) {
; ZVFHMIN-LABEL: fsub_v16f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v10, (a0)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vle16.v v12, (a1)
+; ZVFHMIN-NEXT: vle16.v v16, (a0)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsub.vv v8, v16, v12
+; ZVFHMIN-NEXT: vfsub.vv v8, v12, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v8
; ZVFHMIN-NEXT: vse16.v v12, (a0)
@@ -1900,12 +1900,12 @@ define void @fmul_v16bf16(ptr %x, ptr %y) {
; CHECK-LABEL: fmul_v16bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT: vle16.v v8, (a1)
-; CHECK-NEXT: vle16.v v10, (a0)
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vle16.v v12, (a1)
+; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v16
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfmul.vv v8, v16, v12
+; CHECK-NEXT: vfmul.vv v8, v12, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v12, v8
; CHECK-NEXT: vse16.v v12, (a0)
@@ -1930,12 +1930,12 @@ define void @fmul_v16f16(ptr %x, ptr %y) {
; ZVFHMIN-LABEL: fmul_v16f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v10, (a0)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vle16.v v12, (a1)
+; ZVFHMIN-NEXT: vle16.v v16, (a0)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmul.vv v8, v16, v12
+; ZVFHMIN-NEXT: vfmul.vv v8, v12, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v8
; ZVFHMIN-NEXT: vse16.v v12, (a0)
@@ -1983,12 +1983,12 @@ define void @fdiv_v16bf16(ptr %x, ptr %y) {
; CHECK-LABEL: fdiv_v16bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT: vle16.v v8, (a1)
-; CHECK-NEXT: vle16.v v10, (a0)
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vle16.v v12, (a1)
+; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v16
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfdiv.vv v8, v16, v12
+; CHECK-NEXT: vfdiv.vv v8, v12, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v12, v8
; CHECK-NEXT: vse16.v v12, (a0)
@@ -2013,12 +2013,12 @@ define void @fdiv_v16f16(ptr %x, ptr %y) {
; ZVFHMIN-LABEL: fdiv_v16f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v10, (a0)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vle16.v v12, (a1)
+; ZVFHMIN-NEXT: vle16.v v16, (a0)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v8, v16, v12
+; ZVFHMIN-NEXT: vfdiv.vv v8, v12, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v8
; ZVFHMIN-NEXT: vse16.v v12, (a0)
@@ -2132,17 +2132,17 @@ define void @fma_v16bf16(ptr %x, ptr %y, ptr %z) {
; CHECK-LABEL: fma_v16bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT: vle16.v v8, (a2)
-; CHECK-NEXT: vle16.v v10, (a0)
-; CHECK-NEXT: vle16.v v12, (a1)
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v10
+; CHECK-NEXT: vle16.v v12, (a2)
+; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vle16.v v20, (a1)
; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v16
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v20
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfmadd.vv v8, v20, v16
+; CHECK-NEXT: vfmadd.vv v16, v12, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v12, v8
-; CHECK-NEXT: vse16.v v12, (a0)
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
%a = load <16 x bfloat>, ptr %x
%b = load <16 x bfloat>, ptr %y
@@ -2166,17 +2166,17 @@ define void @fma_v16f16(ptr %x, ptr %y, ptr %z) {
; ZVFHMIN-LABEL: fma_v16f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a2)
-; ZVFHMIN-NEXT: vle16.v v10, (a0)
-; ZVFHMIN-NEXT: vle16.v v12, (a1)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10
+; ZVFHMIN-NEXT: vle16.v v12, (a2)
+; ZVFHMIN-NEXT: vle16.v v16, (a0)
+; ZVFHMIN-NEXT: vle16.v v20, (a1)
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v20
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v8, v20, v16
+; ZVFHMIN-NEXT: vfmadd.vv v16, v12, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v8
-; ZVFHMIN-NEXT: vse16.v v12, (a0)
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vse16.v v8, (a0)
; ZVFHMIN-NEXT: ret
%a = load <16 x half>, ptr %x
%b = load <16 x half>, ptr %y
@@ -2226,13 +2226,13 @@ define void @fadd_vf_v8bf16(ptr %x, bfloat %y) {
; CHECK-LABEL: fadd_vf_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: fmv.x.w a1, fa0
-; CHECK-NEXT: vmv.v.x v9, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfadd.vv v8, v10, v12
+; CHECK-NEXT: vfadd.vv v8, v8, v10
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vse16.v v10, (a0)
@@ -2249,13 +2249,13 @@ define void @fadd_vf_v6bf16(ptr %x, bfloat %y) {
; CHECK-LABEL: fadd_vf_v6bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: fmv.x.w a1, fa0
-; CHECK-NEXT: vmv.v.x v9, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfadd.vv v8, v10, v12
+; CHECK-NEXT: vfadd.vv v8, v8, v10
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vse16.v v10, (a0)
@@ -2280,13 +2280,13 @@ define void @fadd_vf_v8f16(ptr %x, half %y) {
; ZVFHMIN-LABEL: fadd_vf_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: fmv.x.w a1, fa0
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfadd.vv v8, v10, v12
+; ZVFHMIN-NEXT: vfadd.vv v8, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
@@ -2311,13 +2311,13 @@ define void @fadd_vf_v6f16(ptr %x, half %y) {
; ZVFHMIN-LABEL: fadd_vf_v6f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: fmv.x.w a1, fa0
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfadd.vv v8, v10, v12
+; ZVFHMIN-NEXT: vfadd.vv v8, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
@@ -2366,13 +2366,13 @@ define void @fadd_fv_v8bf16(ptr %x, bfloat %y) {
; CHECK-LABEL: fadd_fv_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: fmv.x.w a1, fa0
-; CHECK-NEXT: vmv.v.x v9, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfadd.vv v8, v12, v10
+; CHECK-NEXT: vfadd.vv v8, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vse16.v v10, (a0)
@@ -2389,13 +2389,13 @@ define void @fadd_fv_v6bf16(ptr %x, bfloat %y) {
; CHECK-LABEL: fadd_fv_v6bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: fmv.x.w a1, fa0
-; CHECK-NEXT: vmv.v.x v9, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfadd.vv v8, v12, v10
+; CHECK-NEXT: vfadd.vv v8, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vse16.v v10, (a0)
@@ -2420,13 +2420,13 @@ define void @fadd_fv_v8f16(ptr %x, half %y) {
; ZVFHMIN-LABEL: fadd_fv_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: fmv.x.w a1, fa0
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfadd.vv v8, v12, v10
+; ZVFHMIN-NEXT: vfadd.vv v8, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
@@ -2451,13 +2451,13 @@ define void @fadd_fv_v6f16(ptr %x, half %y) {
; ZVFHMIN-LABEL: fadd_fv_v6f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: fmv.x.w a1, fa0
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfadd.vv v8, v12, v10
+; ZVFHMIN-NEXT: vfadd.vv v8, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
@@ -2506,13 +2506,13 @@ define void @fsub_vf_v8bf16(ptr %x, bfloat %y) {
; CHECK-LABEL: fsub_vf_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: fmv.x.w a1, fa0
-; CHECK-NEXT: vmv.v.x v9, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfsub.vv v8, v10, v12
+; CHECK-NEXT: vfsub.vv v8, v8, v10
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vse16.v v10, (a0)
@@ -2529,13 +2529,13 @@ define void @fsub_vf_v6bf16(ptr %x, bfloat %y) {
; CHECK-LABEL: fsub_vf_v6bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: fmv.x.w a1, fa0
-; CHECK-NEXT: vmv.v.x v9, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfsub.vv v8, v10, v12
+; CHECK-NEXT: vfsub.vv v8, v8, v10
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vse16.v v10, (a0)
@@ -2560,13 +2560,13 @@ define void @fsub_vf_v8f16(ptr %x, half %y) {
; ZVFHMIN-LABEL: fsub_vf_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: fmv.x.w a1, fa0
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfsub.vv v8, v10, v12
+; ZVFHMIN-NEXT: vfsub.vv v8, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
@@ -2591,13 +2591,13 @@ define void @fsub_vf_v6f16(ptr %x, half %y) {
; ZVFHMIN-LABEL: fsub_vf_v6f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: fmv.x.w a1, fa0
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfsub.vv v8, v10, v12
+; ZVFHMIN-NEXT: vfsub.vv v8, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
@@ -2646,13 +2646,13 @@ define void @fsub_fv_v8bf16(ptr %x, bfloat %y) {
; CHECK-LABEL: fsub_fv_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: fmv.x.w a1, fa0
-; CHECK-NEXT: vmv.v.x v9, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfsub.vv v8, v12, v10
+; CHECK-NEXT: vfsub.vv v8, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vse16.v v10, (a0)
@@ -2669,13 +2669,13 @@ define void @fsub_fv_v6bf16(ptr %x, bfloat %y) {
; CHECK-LABEL: fsub_fv_v6bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: fmv.x.w a1, fa0
-; CHECK-NEXT: vmv.v.x v9, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfsub.vv v8, v12, v10
+; CHECK-NEXT: vfsub.vv v8, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vse16.v v10, (a0)
@@ -2700,13 +2700,13 @@ define void @fsub_fv_v8f16(ptr %x, half %y) {
; ZVFHMIN-LABEL: fsub_fv_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: fmv.x.w a1, fa0
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfsub.vv v8, v12, v10
+; ZVFHMIN-NEXT: vfsub.vv v8, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
@@ -2731,13 +2731,13 @@ define void @fsub_fv_v6f16(ptr %x, half %y) {
; ZVFHMIN-LABEL: fsub_fv_v6f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: fmv.x.w a1, fa0
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfsub.vv v8, v12, v10
+; ZVFHMIN-NEXT: vfsub.vv v8, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
@@ -2786,13 +2786,13 @@ define void @fmul_vf_v8bf16(ptr %x, bfloat %y) {
; CHECK-LABEL: fmul_vf_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: fmv.x.w a1, fa0
-; CHECK-NEXT: vmv.v.x v9, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmul.vv v8, v10, v12
+; CHECK-NEXT: vfmul.vv v8, v8, v10
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vse16.v v10, (a0)
@@ -2809,13 +2809,13 @@ define void @fmul_vf_v6bf16(ptr %x, bfloat %y) {
; CHECK-LABEL: fmul_vf_v6bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: fmv.x.w a1, fa0
-; CHECK-NEXT: vmv.v.x v9, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmul.vv v8, v10, v12
+; CHECK-NEXT: vfmul.vv v8, v8, v10
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vse16.v v10, (a0)
@@ -2840,13 +2840,13 @@ define void @fmul_vf_v8f16(ptr %x, half %y) {
; ZVFHMIN-LABEL: fmul_vf_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: fmv.x.w a1, fa0
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmul.vv v8, v10, v12
+; ZVFHMIN-NEXT: vfmul.vv v8, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
@@ -2871,13 +2871,13 @@ define void @fmul_vf_v6f16(ptr %x, half %y) {
; ZVFHMIN-LABEL: fmul_vf_v6f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: fmv.x.w a1, fa0
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmul.vv v8, v10, v12
+; ZVFHMIN-NEXT: vfmul.vv v8, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
@@ -2926,13 +2926,13 @@ define void @fmul_fv_v8bf16(ptr %x, bfloat %y) {
; CHECK-LABEL: fmul_fv_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: fmv.x.w a1, fa0
-; CHECK-NEXT: vmv.v.x v9, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmul.vv v8, v12, v10
+; CHECK-NEXT: vfmul.vv v8, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vse16.v v10, (a0)
@@ -2949,13 +2949,13 @@ define void @fmul_fv_v6bf16(ptr %x, bfloat %y) {
; CHECK-LABEL: fmul_fv_v6bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: fmv.x.w a1, fa0
-; CHECK-NEXT: vmv.v.x v9, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmul.vv v8, v12, v10
+; CHECK-NEXT: vfmul.vv v8, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vse16.v v10, (a0)
@@ -2980,13 +2980,13 @@ define void @fmul_fv_v8f16(ptr %x, half %y) {
; ZVFHMIN-LABEL: fmul_fv_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: fmv.x.w a1, fa0
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmul.vv v8, v12, v10
+; ZVFHMIN-NEXT: vfmul.vv v8, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
@@ -3011,13 +3011,13 @@ define void @fmul_fv_v6f16(ptr %x, half %y) {
; ZVFHMIN-LABEL: fmul_fv_v6f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: fmv.x.w a1, fa0
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmul.vv v8, v12, v10
+; ZVFHMIN-NEXT: vfmul.vv v8, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
@@ -3066,13 +3066,13 @@ define void @fdiv_vf_v8bf16(ptr %x, bfloat %y) {
; CHECK-LABEL: fdiv_vf_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: fmv.x.w a1, fa0
-; CHECK-NEXT: vmv.v.x v9, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfdiv.vv v8, v10, v12
+; CHECK-NEXT: vfdiv.vv v8, v8, v10
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vse16.v v10, (a0)
@@ -3089,13 +3089,13 @@ define void @fdiv_vf_v6bf16(ptr %x, bfloat %y) {
; CHECK-LABEL: fdiv_vf_v6bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: fmv.x.w a1, fa0
-; CHECK-NEXT: vmv.v.x v9, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfdiv.vv v8, v10, v12
+; CHECK-NEXT: vfdiv.vv v8, v8, v10
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vse16.v v10, (a0)
@@ -3120,13 +3120,13 @@ define void @fdiv_vf_v8f16(ptr %x, half %y) {
; ZVFHMIN-LABEL: fdiv_vf_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: fmv.x.w a1, fa0
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v8, v10, v12
+; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
@@ -3151,13 +3151,13 @@ define void @fdiv_vf_v6f16(ptr %x, half %y) {
; ZVFHMIN-LABEL: fdiv_vf_v6f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: fmv.x.w a1, fa0
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v8, v10, v12
+; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
@@ -3206,13 +3206,13 @@ define void @fdiv_fv_v8bf16(ptr %x, bfloat %y) {
; CHECK-LABEL: fdiv_fv_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: fmv.x.w a1, fa0
-; CHECK-NEXT: vmv.v.x v9, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfdiv.vv v8, v12, v10
+; CHECK-NEXT: vfdiv.vv v8, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vse16.v v10, (a0)
@@ -3229,13 +3229,13 @@ define void @fdiv_fv_v6bf16(ptr %x, bfloat %y) {
; CHECK-LABEL: fdiv_fv_v6bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: fmv.x.w a1, fa0
-; CHECK-NEXT: vmv.v.x v9, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfdiv.vv v8, v12, v10
+; CHECK-NEXT: vfdiv.vv v8, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vse16.v v10, (a0)
@@ -3260,13 +3260,13 @@ define void @fdiv_fv_v8f16(ptr %x, half %y) {
; ZVFHMIN-LABEL: fdiv_fv_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: fmv.x.w a1, fa0
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v8, v12, v10
+; ZVFHMIN-NEXT: vfdiv.vv v8, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
@@ -3291,13 +3291,13 @@ define void @fdiv_fv_v6f16(ptr %x, half %y) {
; ZVFHMIN-LABEL: fdiv_fv_v6f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: fmv.x.w a1, fa0
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v8, v12, v10
+; ZVFHMIN-NEXT: vfdiv.vv v8, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
@@ -3346,18 +3346,18 @@ define void @fma_vf_v8bf16(ptr %x, ptr %y, bfloat %z) {
; CHECK-LABEL: fma_vf_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a1)
-; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vle16.v v10, (a1)
+; CHECK-NEXT: vle16.v v12, (a0)
; CHECK-NEXT: fmv.x.w a1, fa0
-; CHECK-NEXT: vmv.v.x v10, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9
+; CHECK-NEXT: vmv.v.x v14, a1
; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v14
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmadd.vv v8, v14, v12
+; CHECK-NEXT: vfmadd.vv v12, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT: vse16.v v10, (a0)
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
%a = load <8 x bfloat>, ptr %x
%b = load <8 x bfloat>, ptr %y
@@ -3372,18 +3372,18 @@ define void @fma_vf_v6bf16(ptr %x, ptr %y, bfloat %z) {
; CHECK-LABEL: fma_vf_v6bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a1)
-; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vle16.v v10, (a1)
+; CHECK-NEXT: vle16.v v12, (a0)
; CHECK-NEXT: fmv.x.w a1, fa0
-; CHECK-NEXT: vmv.v.x v10, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9
+; CHECK-NEXT: vmv.v.x v14, a1
; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v14
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmadd.vv v8, v14, v12
+; CHECK-NEXT: vfmadd.vv v12, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT: vse16.v v10, (a0)
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
%a = load <6 x bfloat>, ptr %x
%b = load <6 x bfloat>, ptr %y
@@ -3407,18 +3407,18 @@ define void @fma_vf_v8f16(ptr %x, ptr %y, half %z) {
; ZVFHMIN-LABEL: fma_vf_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-NEXT: vle16.v v12, (a0)
; ZVFHMIN-NEXT: fmv.x.w a1, fa0
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
+; ZVFHMIN-NEXT: vmv.v.x v14, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v14
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v12
+; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT: vse16.v v10, (a0)
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: vse16.v v8, (a0)
; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
%b = load <8 x half>, ptr %y
@@ -3442,18 +3442,18 @@ define void @fma_vf_v6f16(ptr %x, ptr %y, half %z) {
; ZVFHMIN-LABEL: fma_vf_v6f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-NEXT: vle16.v v12, (a0)
; ZVFHMIN-NEXT: fmv.x.w a1, fa0
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
+; ZVFHMIN-NEXT: vmv.v.x v14, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v14
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v12
+; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT: vse16.v v10, (a0)
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: vse16.v v8, (a0)
; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
@@ -3504,18 +3504,18 @@ define void @fma_fv_v8bf16(ptr %x, ptr %y, bfloat %z) {
; CHECK-LABEL: fma_fv_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a1)
-; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vle16.v v10, (a1)
+; CHECK-NEXT: vle16.v v12, (a0)
; CHECK-NEXT: fmv.x.w a1, fa0
-; CHECK-NEXT: vmv.v.x v10, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9
+; CHECK-NEXT: vmv.v.x v14, a1
; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v14
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmadd.vv v8, v14, v12
+; CHECK-NEXT: vfmadd.vv v12, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT: vse16.v v10, (a0)
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
%a = load <8 x bfloat>, ptr %x
%b = load <8 x bfloat>, ptr %y
@@ -3530,18 +3530,18 @@ define void @fma_fv_v6bf16(ptr %x, ptr %y, bfloat %z) {
; CHECK-LABEL: fma_fv_v6bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a1)
-; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vle16.v v10, (a1)
+; CHECK-NEXT: vle16.v v12, (a0)
; CHECK-NEXT: fmv.x.w a1, fa0
-; CHECK-NEXT: vmv.v.x v10, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9
+; CHECK-NEXT: vmv.v.x v14, a1
; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v14
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmadd.vv v8, v14, v12
+; CHECK-NEXT: vfmadd.vv v12, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT: vse16.v v10, (a0)
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
%a = load <6 x bfloat>, ptr %x
%b = load <6 x bfloat>, ptr %y
@@ -3565,18 +3565,18 @@ define void @fma_fv_v8f16(ptr %x, ptr %y, half %z) {
; ZVFHMIN-LABEL: fma_fv_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-NEXT: vle16.v v12, (a0)
; ZVFHMIN-NEXT: fmv.x.w a1, fa0
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
+; ZVFHMIN-NEXT: vmv.v.x v14, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v14
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v12
+; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT: vse16.v v10, (a0)
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: vse16.v v8, (a0)
; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
%b = load <8 x half>, ptr %y
@@ -3600,18 +3600,18 @@ define void @fma_fv_v6f16(ptr %x, ptr %y, half %z) {
; ZVFHMIN-LABEL: fma_fv_v6f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-NEXT: vle16.v v12, (a0)
; ZVFHMIN-NEXT: fmv.x.w a1, fa0
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
+; ZVFHMIN-NEXT: vmv.v.x v14, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v14
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v12
+; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT: vse16.v v10, (a0)
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: vse16.v v8, (a0)
; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
@@ -3664,18 +3664,18 @@ define void @fmsub_vf_v8bf16(ptr %x, ptr %y, bfloat %z) {
; CHECK-NEXT: fmv.x.w a2, fa0
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a1)
-; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: lui a1, 8
-; CHECK-NEXT: vmv.v.x v10, a2
-; CHECK-NEXT: vxor.vx v8, v8, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
-; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8
+; CHECK-NEXT: vmv.v.x v14, a2
+; CHECK-NEXT: vxor.vx v12, v8, a1
; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v14
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmadd.vv v8, v12, v14
+; CHECK-NEXT: vfmadd.vv v12, v8, v10
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT: vse16.v v10, (a0)
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
%a = load <8 x bfloat>, ptr %x
%b = load <8 x bfloat>, ptr %y
@@ -3693,18 +3693,18 @@ define void @fmsub_vf_v6bf16(ptr %x, ptr %y, bfloat %z) {
; CHECK-NEXT: fmv.x.w a2, fa0
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a1)
-; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: lui a1, 8
-; CHECK-NEXT: vmv.v.x v10, a2
-; CHECK-NEXT: vxor.vx v8, v8, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
-; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8
+; CHECK-NEXT: vmv.v.x v14, a2
+; CHECK-NEXT: vxor.vx v12, v8, a1
; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v14
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmadd.vv v8, v12, v14
+; CHECK-NEXT: vfmadd.vv v12, v8, v10
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT: vse16.v v10, (a0)
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
%a = load <6 x bfloat>, ptr %x
%b = load <6 x bfloat>, ptr %y
@@ -3731,18 +3731,18 @@ define void @fmsub_vf_v8f16(ptr %x, ptr %y, half %z) {
; ZVFHMIN-NEXT: fmv.x.w a2, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: lui a1, 8
-; ZVFHMIN-NEXT: vmv.v.x v10, a2
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
+; ZVFHMIN-NEXT: vmv.v.x v14, a2
+; ZVFHMIN-NEXT: vxor.vx v12, v8, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v14
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v8, v12, v14
+; ZVFHMIN-NEXT: vfmadd.vv v12, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT: vse16.v v10, (a0)
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: vse16.v v8, (a0)
; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
%b = load <8 x half>, ptr %y
@@ -3769,18 +3769,18 @@ define void @fmsub_vf_v6f16(ptr %x, ptr %y, half %z) {
; ZVFHMIN-NEXT: fmv.x.w a2, fa0
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: lui a1, 8
-; ZVFHMIN-NEXT: vmv.v.x v10, a2
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
+; ZVFHMIN-NEXT: vmv.v.x v14, a2
+; ZVFHMIN-NEXT: vxor.vx v12, v8, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v14
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v8, v12, v14
+; ZVFHMIN-NEXT: vfmadd.vv v12, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT: vse16.v v10, (a0)
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: vse16.v v8, (a0)
; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
@@ -3874,20 +3874,20 @@ define void @trunc_v8bf16(ptr %x) {
; CHECK-LABEL: trunc_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: lui a1, 307200
; CHECK-NEXT: fmv.w.x fa5, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfabs.v v8, v10
-; CHECK-NEXT: vmflt.vf v0, v8, fa5
-; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vfabs.v v10, v8
+; CHECK-NEXT: vmflt.vf v0, v10, fa5
+; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
-; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
+; CHECK-NEXT: vse16.v v10, (a0)
; CHECK-NEXT: ret
%a = load <8 x bfloat>, ptr %x
%b = call <8 x bfloat> @llvm.trunc.v8bf16(<8 x bfloat> %a)
@@ -3899,20 +3899,20 @@ define void @trunc_v6bf16(ptr %x) {
; CHECK-LABEL: trunc_v6bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: lui a1, 307200
; CHECK-NEXT: fmv.w.x fa5, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfabs.v v8, v10
-; CHECK-NEXT: vmflt.vf v0, v8, fa5
-; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vfabs.v v10, v8
+; CHECK-NEXT: vmflt.vf v0, v10, fa5
+; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
-; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
+; CHECK-NEXT: vse16.v v10, (a0)
; CHECK-NEXT: ret
%a = load <6 x bfloat>, ptr %x
%b = call <6 x bfloat> @llvm.trunc.v6bf16(<6 x bfloat> %a)
@@ -3939,20 +3939,20 @@ define void @trunc_v8f16(ptr %x) {
; ZVFHMIN-LABEL: trunc_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: lui a1, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v8, v10
-; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
-; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t
-; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vfabs.v v10, v8
+; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5
+; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
-; ZVFHMIN-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
+; ZVFHMIN-NEXT: vse16.v v10, (a0)
; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
%b = call <8 x half> @llvm.trunc.v8f16(<8 x half> %a)
@@ -3979,20 +3979,20 @@ define void @trunc_v6f16(ptr %x) {
; ZVFHMIN-LABEL: trunc_v6f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: lui a1, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v8, v10
-; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
-; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t
-; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vfabs.v v10, v8
+; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5
+; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
-; ZVFHMIN-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
+; ZVFHMIN-NEXT: vse16.v v10, (a0)
; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = call <6 x half> @llvm.trunc.v6f16(<6 x half> %a)
@@ -4046,22 +4046,22 @@ define void @ceil_v8bf16(ptr %x) {
; CHECK-LABEL: ceil_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: lui a1, 307200
; CHECK-NEXT: fmv.w.x fa5, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfabs.v v8, v10
-; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: vfabs.v v10, v8
+; CHECK-NEXT: vmflt.vf v0, v10, fa5
; CHECK-NEXT: fsrmi a1, 3
-; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
-; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
+; CHECK-NEXT: vse16.v v10, (a0)
; CHECK-NEXT: ret
%a = load <8 x bfloat>, ptr %x
%b = call <8 x bfloat> @llvm.ceil.v8bf16(<8 x bfloat> %a)
@@ -4073,22 +4073,22 @@ define void @ceil_v6bf16(ptr %x) {
; CHECK-LABEL: ceil_v6bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: lui a1, 307200
; CHECK-NEXT: fmv.w.x fa5, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfabs.v v8, v10
-; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: vfabs.v v10, v8
+; CHECK-NEXT: vmflt.vf v0, v10, fa5
; CHECK-NEXT: fsrmi a1, 3
-; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
-; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
+; CHECK-NEXT: vse16.v v10, (a0)
; CHECK-NEXT: ret
%a = load <6 x bfloat>, ptr %x
%b = call <6 x bfloat> @llvm.ceil.v6bf16(<6 x bfloat> %a)
@@ -4117,22 +4117,22 @@ define void @ceil_v8f16(ptr %x) {
; ZVFHMIN-LABEL: ceil_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: lui a1, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v8, v10
-; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: vfabs.v v10, v8
+; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5
; ZVFHMIN-NEXT: fsrmi a1, 3
-; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t
; ZVFHMIN-NEXT: fsrm a1
-; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
-; ZVFHMIN-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
+; ZVFHMIN-NEXT: vse16.v v10, (a0)
; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
%b = call <8 x half> @llvm.ceil.v8f16(<8 x half> %a)
@@ -4161,22 +4161,22 @@ define void @ceil_v6f16(ptr %x) {
; ZVFHMIN-LABEL: ceil_v6f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: lui a1, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v8, v10
-; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: vfabs.v v10, v8
+; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5
; ZVFHMIN-NEXT: fsrmi a1, 3
-; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t
; ZVFHMIN-NEXT: fsrm a1
-; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
-; ZVFHMIN-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
+; ZVFHMIN-NEXT: vse16.v v10, (a0)
; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = call <6 x half> @llvm.ceil.v6f16(<6 x half> %a)
@@ -4234,22 +4234,22 @@ define void @floor_v8bf16(ptr %x) {
; CHECK-LABEL: floor_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: lui a1, 307200
; CHECK-NEXT: fmv.w.x fa5, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfabs.v v8, v10
-; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: vfabs.v v10, v8
+; CHECK-NEXT: vmflt.vf v0, v10, fa5
; CHECK-NEXT: fsrmi a1, 2
-; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
-; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
+; CHECK-NEXT: vse16.v v10, (a0)
; CHECK-NEXT: ret
%a = load <8 x bfloat>, ptr %x
%b = call <8 x bfloat> @llvm.floor.v8bf16(<8 x bfloat> %a)
@@ -4261,22 +4261,22 @@ define void @floor_v6bf16(ptr %x) {
; CHECK-LABEL: floor_v6bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: lui a1, 307200
; CHECK-NEXT: fmv.w.x fa5, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfabs.v v8, v10
-; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: vfabs.v v10, v8
+; CHECK-NEXT: vmflt.vf v0, v10, fa5
; CHECK-NEXT: fsrmi a1, 2
-; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
-; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
+; CHECK-NEXT: vse16.v v10, (a0)
; CHECK-NEXT: ret
%a = load <6 x bfloat>, ptr %x
%b = call <6 x bfloat> @llvm.floor.v6bf16(<6 x bfloat> %a)
@@ -4305,22 +4305,22 @@ define void @floor_v8f16(ptr %x) {
; ZVFHMIN-LABEL: floor_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: lui a1, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v8, v10
-; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: vfabs.v v10, v8
+; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5
; ZVFHMIN-NEXT: fsrmi a1, 2
-; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t
; ZVFHMIN-NEXT: fsrm a1
-; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
-; ZVFHMIN-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
+; ZVFHMIN-NEXT: vse16.v v10, (a0)
; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
%b = call <8 x half> @llvm.floor.v8f16(<8 x half> %a)
@@ -4349,22 +4349,22 @@ define void @floor_v6f16(ptr %x) {
; ZVFHMIN-LABEL: floor_v6f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: lui a1, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v8, v10
-; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: vfabs.v v10, v8
+; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5
; ZVFHMIN-NEXT: fsrmi a1, 2
-; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t
; ZVFHMIN-NEXT: fsrm a1
-; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
-; ZVFHMIN-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
+; ZVFHMIN-NEXT: vse16.v v10, (a0)
; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = call <6 x half> @llvm.floor.v6f16(<6 x half> %a)
@@ -4422,22 +4422,22 @@ define void @round_v8bf16(ptr %x) {
; CHECK-LABEL: round_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: lui a1, 307200
; CHECK-NEXT: fmv.w.x fa5, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfabs.v v8, v10
-; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: vfabs.v v10, v8
+; CHECK-NEXT: vmflt.vf v0, v10, fa5
; CHECK-NEXT: fsrmi a1, 4
-; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
-; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
+; CHECK-NEXT: vse16.v v10, (a0)
; CHECK-NEXT: ret
%a = load <8 x bfloat>, ptr %x
%b = call <8 x bfloat> @llvm.round.v8bf16(<8 x bfloat> %a)
@@ -4449,22 +4449,22 @@ define void @round_v6bf16(ptr %x) {
; CHECK-LABEL: round_v6bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: lui a1, 307200
; CHECK-NEXT: fmv.w.x fa5, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfabs.v v8, v10
-; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: vfabs.v v10, v8
+; CHECK-NEXT: vmflt.vf v0, v10, fa5
; CHECK-NEXT: fsrmi a1, 4
-; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
-; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
+; CHECK-NEXT: vse16.v v10, (a0)
; CHECK-NEXT: ret
%a = load <6 x bfloat>, ptr %x
%b = call <6 x bfloat> @llvm.round.v6bf16(<6 x bfloat> %a)
@@ -4493,22 +4493,22 @@ define void @round_v8f16(ptr %x) {
; ZVFHMIN-LABEL: round_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: lui a1, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v8, v10
-; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: vfabs.v v10, v8
+; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5
; ZVFHMIN-NEXT: fsrmi a1, 4
-; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t
; ZVFHMIN-NEXT: fsrm a1
-; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
-; ZVFHMIN-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
+; ZVFHMIN-NEXT: vse16.v v10, (a0)
; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
%b = call <8 x half> @llvm.round.v8f16(<8 x half> %a)
@@ -4537,22 +4537,22 @@ define void @round_v6f16(ptr %x) {
; ZVFHMIN-LABEL: round_v6f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: lui a1, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v8, v10
-; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: vfabs.v v10, v8
+; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5
; ZVFHMIN-NEXT: fsrmi a1, 4
-; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t
; ZVFHMIN-NEXT: fsrm a1
-; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
-; ZVFHMIN-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
+; ZVFHMIN-NEXT: vse16.v v10, (a0)
; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = call <6 x half> @llvm.round.v6f16(<6 x half> %a)
@@ -4610,20 +4610,20 @@ define void @rint_v8bf16(ptr %x) {
; CHECK-LABEL: rint_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: lui a1, 307200
; CHECK-NEXT: fmv.w.x fa5, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfabs.v v8, v10
-; CHECK-NEXT: vmflt.vf v0, v8, fa5
-; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vfabs.v v10, v8
+; CHECK-NEXT: vmflt.vf v0, v10, fa5
+; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
-; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
+; CHECK-NEXT: vse16.v v10, (a0)
; CHECK-NEXT: ret
%a = load <8 x bfloat>, ptr %x
%b = call <8 x bfloat> @llvm.rint.v8bf16(<8 x bfloat> %a)
@@ -4650,20 +4650,20 @@ define void @rint_v8f16(ptr %x) {
; ZVFHMIN-LABEL: rint_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: lui a1, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v8, v10
-; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
-; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
-; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vfabs.v v10, v8
+; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5
+; ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
-; ZVFHMIN-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
+; ZVFHMIN-NEXT: vse16.v v10, (a0)
; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
%b = call <8 x half> @llvm.rint.v8f16(<8 x half> %a)
@@ -4717,22 +4717,22 @@ define void @nearbyint_v8bf16(ptr %x) {
; CHECK-LABEL: nearbyint_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: lui a1, 307200
; CHECK-NEXT: fmv.w.x fa5, a1
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfabs.v v8, v10
-; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: vfabs.v v10, v8
+; CHECK-NEXT: vmflt.vf v0, v10, fa5
; CHECK-NEXT: frflags a1
-; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: fsflags a1
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
-; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
+; CHECK-NEXT: vse16.v v10, (a0)
; CHECK-NEXT: ret
%a = load <8 x bfloat>, ptr %x
%b = call <8 x bfloat> @llvm.nearbyint.v8bf16(<8 x bfloat> %a)
@@ -4761,22 +4761,22 @@ define void @nearbyint_v8f16(ptr %x) {
; ZVFHMIN-LABEL: nearbyint_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
; ZVFHMIN-NEXT: lui a1, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v8, v10
-; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: vfabs.v v10, v8
+; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5
; ZVFHMIN-NEXT: frflags a1
-; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
-; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t
; ZVFHMIN-NEXT: fsflags a1
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
-; ZVFHMIN-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
+; ZVFHMIN-NEXT: vse16.v v10, (a0)
; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
%b = call <8 x half> @llvm.nearbyint.v8f16(<8 x half> %a)
@@ -4834,19 +4834,19 @@ define void @fmuladd_v8bf16(ptr %x, ptr %y, ptr %z) {
; CHECK-LABEL: fmuladd_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a1)
-; CHECK-NEXT: vle16.v v9, (a0)
-; CHECK-NEXT: vle16.v v10, (a2)
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9
+; CHECK-NEXT: vle16.v v10, (a1)
+; CHECK-NEXT: vle16.v v12, (a0)
+; CHECK-NEXT: vle16.v v13, (a2)
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmul.vv v8, v14, v12
+; CHECK-NEXT: vfmul.vv v8, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v11, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v11
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v13
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfadd.vv v8, v8, v12
+; CHECK-NEXT: vfadd.vv v8, v8, v10
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vse16.v v10, (a0)
@@ -4863,19 +4863,19 @@ define void @fmuladd_v6bf16(ptr %x, ptr %y, ptr %z) {
; CHECK-LABEL: fmuladd_v6bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a1)
-; CHECK-NEXT: vle16.v v9, (a0)
-; CHECK-NEXT: vle16.v v10, (a2)
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9
+; CHECK-NEXT: vle16.v v10, (a1)
+; CHECK-NEXT: vle16.v v12, (a0)
+; CHECK-NEXT: vle16.v v13, (a2)
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmul.vv v8, v14, v12
+; CHECK-NEXT: vfmul.vv v8, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v11, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v11
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v13
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfadd.vv v8, v8, v12
+; CHECK-NEXT: vfadd.vv v8, v8, v10
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vse16.v v10, (a0)
@@ -4902,19 +4902,19 @@ define void @fmuladd_v8f16(ptr %x, ptr %y, ptr %z) {
; ZVFHMIN-LABEL: fmuladd_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-NEXT: vle16.v v10, (a2)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
+; ZVFHMIN-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-NEXT: vle16.v v12, (a0)
+; ZVFHMIN-NEXT: vle16.v v13, (a2)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmul.vv v8, v14, v12
+; ZVFHMIN-NEXT: vfmul.vv v8, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v13
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfadd.vv v8, v8, v12
+; ZVFHMIN-NEXT: vfadd.vv v8, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
@@ -4941,19 +4941,19 @@ define void @fmuladd_v6f16(ptr %x, ptr %y, ptr %z) {
; ZVFHMIN-LABEL: fmuladd_v6f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-NEXT: vle16.v v10, (a2)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
+; ZVFHMIN-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-NEXT: vle16.v v12, (a0)
+; ZVFHMIN-NEXT: vle16.v v13, (a2)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmul.vv v8, v14, v12
+; ZVFHMIN-NEXT: vfmul.vv v8, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v13
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfadd.vv v8, v8, v12
+; ZVFHMIN-NEXT: vfadd.vv v8, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
@@ -5006,19 +5006,19 @@ define void @fmsub_fmuladd_v8bf16(ptr %x, ptr %y, ptr %z) {
; CHECK-LABEL: fmsub_fmuladd_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a1)
-; CHECK-NEXT: vle16.v v9, (a0)
-; CHECK-NEXT: vle16.v v10, (a2)
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9
+; CHECK-NEXT: vle16.v v10, (a1)
+; CHECK-NEXT: vle16.v v12, (a0)
+; CHECK-NEXT: vle16.v v13, (a2)
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmul.vv v8, v14, v12
+; CHECK-NEXT: vfmul.vv v8, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v11, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v11
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v13
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfsub.vv v8, v8, v12
+; CHECK-NEXT: vfsub.vv v8, v8, v10
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vse16.v v10, (a0)
@@ -5036,19 +5036,19 @@ define void @fmsub_fmuladd_v6bf16(ptr %x, ptr %y, ptr %z) {
; CHECK-LABEL: fmsub_fmuladd_v6bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a1)
-; CHECK-NEXT: vle16.v v9, (a0)
-; CHECK-NEXT: vle16.v v10, (a2)
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9
+; CHECK-NEXT: vle16.v v10, (a1)
+; CHECK-NEXT: vle16.v v12, (a0)
+; CHECK-NEXT: vle16.v v13, (a2)
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmul.vv v8, v14, v12
+; CHECK-NEXT: vfmul.vv v8, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v11, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v11
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v13
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfsub.vv v8, v8, v12
+; CHECK-NEXT: vfsub.vv v8, v8, v10
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vse16.v v10, (a0)
@@ -5076,19 +5076,19 @@ define void @fmsub_fmuladd_v8f16(ptr %x, ptr %y, ptr %z) {
; ZVFHMIN-LABEL: fmsub_fmuladd_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-NEXT: vle16.v v10, (a2)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
+; ZVFHMIN-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-NEXT: vle16.v v12, (a0)
+; ZVFHMIN-NEXT: vle16.v v13, (a2)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmul.vv v8, v14, v12
+; ZVFHMIN-NEXT: vfmul.vv v8, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v13
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfsub.vv v8, v8, v12
+; ZVFHMIN-NEXT: vfsub.vv v8, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
@@ -5116,19 +5116,19 @@ define void @fmsub_fmuladd_v6f16(ptr %x, ptr %y, ptr %z) {
; ZVFHMIN-LABEL: fmsub_fmuladd_v6f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-NEXT: vle16.v v10, (a2)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
+; ZVFHMIN-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-NEXT: vle16.v v12, (a0)
+; ZVFHMIN-NEXT: vle16.v v13, (a2)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmul.vv v8, v14, v12
+; ZVFHMIN-NEXT: vfmul.vv v8, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v13
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfsub.vv v8, v8, v12
+; ZVFHMIN-NEXT: vfsub.vv v8, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
index f0e6df6298471..1f9a8bf8133c8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
@@ -113,11 +113,11 @@ define void @fp2si_v8f32_v8i64(ptr %x, ptr %y) {
; CHECK-LABEL: fp2si_v8f32_v8i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vmfne.vv v0, v8, v8
-; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v8
+; CHECK-NEXT: vle32.v v12, (a0)
+; CHECK-NEXT: vmfne.vv v0, v12, v12
+; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v12
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT: vmerge.vim v8, v12, 0, v0
+; CHECK-NEXT: vmerge.vim v8, v8, 0, v0
; CHECK-NEXT: vse64.v v8, (a1)
; CHECK-NEXT: ret
%a = load <8 x float>, ptr %x
@@ -132,11 +132,11 @@ define void @fp2ui_v8f32_v8i64(ptr %x, ptr %y) {
; CHECK-LABEL: fp2ui_v8f32_v8i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vmfne.vv v0, v8, v8
-; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8
+; CHECK-NEXT: vle32.v v12, (a0)
+; CHECK-NEXT: vmfne.vv v0, v12, v12
+; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v12
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT: vmerge.vim v8, v12, 0, v0
+; CHECK-NEXT: vmerge.vim v8, v8, 0, v0
; CHECK-NEXT: vse64.v v8, (a1)
; CHECK-NEXT: ret
%a = load <8 x float>, ptr %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
index ddcb3c3121bc3..9be93d5209121 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
@@ -400,9 +400,9 @@ define void @fp2si_v8f32_v8i64(ptr %x, ptr %y) {
; CHECK-LABEL: fp2si_v8f32_v8i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v8
-; CHECK-NEXT: vse64.v v12, (a1)
+; CHECK-NEXT: vle32.v v12, (a0)
+; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v12
+; CHECK-NEXT: vse64.v v8, (a1)
; CHECK-NEXT: ret
%a = load <8 x float>, ptr %x
%d = fptosi <8 x float> %a to <8 x i64>
@@ -414,9 +414,9 @@ define void @fp2ui_v8f32_v8i64(ptr %x, ptr %y) {
; CHECK-LABEL: fp2ui_v8f32_v8i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8
-; CHECK-NEXT: vse64.v v12, (a1)
+; CHECK-NEXT: vle32.v v12, (a0)
+; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v12
+; CHECK-NEXT: vse64.v v8, (a1)
; CHECK-NEXT: ret
%a = load <8 x float>, ptr %x
%d = fptoui <8 x float> %a to <8 x i64>
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll
index a1466d46f1ba7..c29f4edb8c6a3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll
@@ -84,8 +84,8 @@ define <15 x double> @vfpext_v15f32_v15f64(<15 x float> %a, <15 x i1> %m, i32 ze
; CHECK-LABEL: vfpext_v15f32_v15f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vfwcvt.f.f.v v16, v8, v0.t
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.f.f.v v8, v16, v0.t
; CHECK-NEXT: ret
%v = call <15 x double> @llvm.vp.fpext.v15f64.v15f32(<15 x float> %a, <15 x i1> %m, i32 %vl)
ret <15 x double> %v
@@ -98,24 +98,25 @@ define <32 x double> @vfpext_v32f32_v32f64(<32 x float> %a, <32 x i1> %m, i32 ze
; CHECK: # %bb.0:
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v16, v0, 2
+; CHECK-NEXT: vslidedown.vi v24, v0, 2
; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB7_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: .LBB7_2:
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vfwcvt.f.f.v v24, v8, v0.t
+; CHECK-NEXT: vfwcvt.f.f.v v16, v8, v0.t
; CHECK-NEXT: addi a1, a0, -16
; CHECK-NEXT: sltu a0, a0, a1
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v8, 16
-; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vfwcvt.f.f.v v16, v8, v0.t
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vfwcvt.f.f.v v24, v8, v0.t
+; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv8r.v v16, v24
; CHECK-NEXT: ret
%v = call <32 x double> @llvm.vp.fpext.v32f64.v32f32(<32 x float> %a, <32 x i1> %m, i32 %vl)
ret <32 x double> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll
index f6c992280c6e3..cbc4c69669b51 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll
@@ -269,8 +269,8 @@ define <4 x i64> @vfptosi_v4i64_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroext
; CHECK-LABEL: vfptosi_v4i64_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v10, v0.t
; CHECK-NEXT: ret
%v = call <4 x i64> @llvm.vp.fptosi.v4i64.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl)
ret <4 x i64> %v
@@ -280,8 +280,8 @@ define <4 x i64> @vfptosi_v4i64_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl
; CHECK-LABEL: vfptosi_v4i64_v4f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v10
; CHECK-NEXT: ret
%v = call <4 x i64> @llvm.vp.fptosi.v4i64.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl)
ret <4 x i64> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll
index af225f4d95aa2..5dd3e0372f401 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll
@@ -269,8 +269,8 @@ define <4 x i64> @vfptoui_v4i64_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroext
; CHECK-LABEL: vfptoui_v4i64_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v10, v0.t
; CHECK-NEXT: ret
%v = call <4 x i64> @llvm.vp.fptoui.v4i64.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl)
ret <4 x i64> %v
@@ -280,8 +280,8 @@ define <4 x i64> @vfptoui_v4i64_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl
; CHECK-LABEL: vfptoui_v4i64_v4f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v10
; CHECK-NEXT: ret
%v = call <4 x i64> @llvm.vp.fptoui.v4i64.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl)
ret <4 x i64> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
index 29f8730021ce4..b52b082282ec4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
@@ -369,9 +369,9 @@ define void @si2fp_v8i16_v8f64(ptr %x, ptr %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsext.vf2 v10, v8
-; CHECK-NEXT: vfwcvt.f.x.v v12, v10
-; CHECK-NEXT: vse64.v v12, (a1)
+; CHECK-NEXT: vsext.vf2 v12, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v12
+; CHECK-NEXT: vse64.v v8, (a1)
; CHECK-NEXT: ret
%a = load <8 x i16>, ptr %x
%d = sitofp <8 x i16> %a to <8 x double>
@@ -384,9 +384,9 @@ define void @ui2fp_v8i16_v8f64(ptr %x, ptr %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vfwcvt.f.xu.v v12, v10
-; CHECK-NEXT: vse64.v v12, (a1)
+; CHECK-NEXT: vzext.vf2 v12, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT: vse64.v v8, (a1)
; CHECK-NEXT: ret
%a = load <8 x i16>, ptr %x
%d = uitofp <8 x i16> %a to <8 x double>
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
index 62e7e3b109902..5747bbb3f100c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
@@ -269,11 +269,11 @@ define void @insert_v8i32_v2i32_0(ptr %vp, ptr %svp) {
; VLS-LABEL: insert_v8i32_v2i32_0:
; VLS: # %bb.0:
; VLS-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; VLS-NEXT: vle32.v v8, (a1)
-; VLS-NEXT: vl2re32.v v10, (a0)
+; VLS-NEXT: vle32.v v10, (a1)
+; VLS-NEXT: vl2re32.v v8, (a0)
; VLS-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; VLS-NEXT: vmv.v.v v10, v8
-; VLS-NEXT: vs2r.v v10, (a0)
+; VLS-NEXT: vmv.v.v v8, v10
+; VLS-NEXT: vs2r.v v8, (a0)
; VLS-NEXT: ret
%sv = load <2 x i32>, ptr %svp
%vec = load <8 x i32>, ptr %vp
@@ -298,11 +298,11 @@ define void @insert_v8i32_v2i32_2(ptr %vp, ptr %svp) {
; VLS-LABEL: insert_v8i32_v2i32_2:
; VLS: # %bb.0:
; VLS-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; VLS-NEXT: vle32.v v8, (a1)
-; VLS-NEXT: vl2re32.v v10, (a0)
+; VLS-NEXT: vle32.v v10, (a1)
+; VLS-NEXT: vl2re32.v v8, (a0)
; VLS-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; VLS-NEXT: vslideup.vi v10, v8, 2
-; VLS-NEXT: vs2r.v v10, (a0)
+; VLS-NEXT: vslideup.vi v8, v10, 2
+; VLS-NEXT: vs2r.v v8, (a0)
; VLS-NEXT: ret
%sv = load <2 x i32>, ptr %svp
%vec = load <8 x i32>, ptr %vp
@@ -326,11 +326,11 @@ define void @insert_v8i32_v2i32_6(ptr %vp, ptr %svp) {
; VLS-LABEL: insert_v8i32_v2i32_6:
; VLS: # %bb.0:
; VLS-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; VLS-NEXT: vle32.v v8, (a1)
-; VLS-NEXT: vl2re32.v v10, (a0)
+; VLS-NEXT: vle32.v v10, (a1)
+; VLS-NEXT: vl2re32.v v8, (a0)
; VLS-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; VLS-NEXT: vslideup.vi v11, v8, 2
-; VLS-NEXT: vs2r.v v10, (a0)
+; VLS-NEXT: vslideup.vi v9, v10, 2
+; VLS-NEXT: vs2r.v v8, (a0)
; VLS-NEXT: ret
%sv = load <2 x i32>, ptr %svp
%vec = load <8 x i32>, ptr %vp
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index 16bb2105f8680..a91263e85e9e8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -974,86 +974,86 @@ define <512 x i8> @buildvec_not_vid_v512i8_indices_overflow_2() vscale_range(16,
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 16, e32, mf2, ta, ma
; RV32-NEXT: vmv.v.i v0, 15
-; RV32-NEXT: vmv.v.i v8, 0
+; RV32-NEXT: vmv.v.i v12, 0
; RV32-NEXT: li a0, 512
; RV32-NEXT: li a1, 240
-; RV32-NEXT: vmerge.vim v9, v8, -1, v0
+; RV32-NEXT: vmerge.vim v13, v12, -1, v0
; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; RV32-NEXT: vmv.v.i v12, 3
-; RV32-NEXT: vmv1r.v v0, v9
-; RV32-NEXT: vmerge.vim v12, v12, 0, v0
+; RV32-NEXT: vmv.v.i v8, 3
+; RV32-NEXT: vmv1r.v v0, v13
+; RV32-NEXT: vmerge.vim v8, v8, 0, v0
; RV32-NEXT: vsetvli zero, zero, e16, m8, ta, ma
; RV32-NEXT: vmv.s.x v0, a1
; RV32-NEXT: li a1, 15
; RV32-NEXT: vsetivli zero, 16, e32, mf2, ta, ma
-; RV32-NEXT: vmerge.vim v9, v8, -1, v0
+; RV32-NEXT: vmerge.vim v13, v12, -1, v0
; RV32-NEXT: slli a1, a1, 8
-; RV32-NEXT: vmv1r.v v0, v9
+; RV32-NEXT: vmv1r.v v0, v13
; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; RV32-NEXT: vmerge.vim v12, v12, 1, v0
+; RV32-NEXT: vmerge.vim v8, v8, 1, v0
; RV32-NEXT: vsetvli zero, zero, e16, m8, ta, ma
; RV32-NEXT: vmv.s.x v0, a1
; RV32-NEXT: vsetivli zero, 16, e32, mf2, ta, ma
-; RV32-NEXT: vmerge.vim v8, v8, -1, v0
-; RV32-NEXT: vmv1r.v v0, v8
+; RV32-NEXT: vmerge.vim v12, v12, -1, v0
+; RV32-NEXT: vmv1r.v v0, v12
; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; RV32-NEXT: vmerge.vim v8, v12, 2, v0
+; RV32-NEXT: vmerge.vim v8, v8, 2, v0
; RV32-NEXT: ret
;
; RV64V-LABEL: buildvec_not_vid_v512i8_indices_overflow_2:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; RV64V-NEXT: vmv.v.i v0, 3
-; RV64V-NEXT: vmv.v.i v8, 0
+; RV64V-NEXT: vmv.v.i v12, 0
; RV64V-NEXT: li a0, 512
-; RV64V-NEXT: vmerge.vim v9, v8, -1, v0
+; RV64V-NEXT: vmerge.vim v13, v12, -1, v0
; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; RV64V-NEXT: vmv.v.i v12, 3
-; RV64V-NEXT: vmv1r.v v0, v9
-; RV64V-NEXT: vmerge.vim v12, v12, 0, v0
+; RV64V-NEXT: vmv.v.i v8, 3
+; RV64V-NEXT: vmv1r.v v0, v13
+; RV64V-NEXT: vmerge.vim v8, v8, 0, v0
; RV64V-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; RV64V-NEXT: vmv.v.i v0, 12
-; RV64V-NEXT: vmerge.vim v9, v8, -1, v0
+; RV64V-NEXT: vmerge.vim v13, v12, -1, v0
; RV64V-NEXT: li a1, 48
-; RV64V-NEXT: vmv.v.v v0, v9
+; RV64V-NEXT: vmv.v.v v0, v13
; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; RV64V-NEXT: vmerge.vim v12, v12, 1, v0
+; RV64V-NEXT: vmerge.vim v8, v8, 1, v0
; RV64V-NEXT: vmv.s.x v0, a1
; RV64V-NEXT: vsetivli zero, 8, e64, m1, ta, ma
-; RV64V-NEXT: vmerge.vim v8, v8, -1, v0
-; RV64V-NEXT: vmv.v.v v0, v8
+; RV64V-NEXT: vmerge.vim v12, v12, -1, v0
+; RV64V-NEXT: vmv.v.v v0, v12
; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; RV64V-NEXT: vmerge.vim v8, v12, 2, v0
+; RV64V-NEXT: vmerge.vim v8, v8, 2, v0
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_not_vid_v512i8_indices_overflow_2:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m1, ta, ma
; RV64ZVE32-NEXT: vmv.v.i v0, 15
-; RV64ZVE32-NEXT: vmv.v.i v8, 0
+; RV64ZVE32-NEXT: vmv.v.i v12, 0
; RV64ZVE32-NEXT: li a0, 512
; RV64ZVE32-NEXT: li a1, 240
-; RV64ZVE32-NEXT: vmerge.vim v9, v8, -1, v0
+; RV64ZVE32-NEXT: vmerge.vim v13, v12, -1, v0
; RV64ZVE32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; RV64ZVE32-NEXT: vmv.v.i v12, 3
-; RV64ZVE32-NEXT: vmv1r.v v0, v9
-; RV64ZVE32-NEXT: vmerge.vim v12, v12, 0, v0
+; RV64ZVE32-NEXT: vmv.v.i v8, 3
+; RV64ZVE32-NEXT: vmv1r.v v0, v13
+; RV64ZVE32-NEXT: vmerge.vim v8, v8, 0, v0
; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m8, ta, ma
; RV64ZVE32-NEXT: vmv.s.x v0, a1
; RV64ZVE32-NEXT: li a1, 15
; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m1, ta, ma
-; RV64ZVE32-NEXT: vmerge.vim v9, v8, -1, v0
+; RV64ZVE32-NEXT: vmerge.vim v13, v12, -1, v0
; RV64ZVE32-NEXT: slli a1, a1, 8
-; RV64ZVE32-NEXT: vmv.v.v v0, v9
+; RV64ZVE32-NEXT: vmv.v.v v0, v13
; RV64ZVE32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; RV64ZVE32-NEXT: vmerge.vim v12, v12, 1, v0
+; RV64ZVE32-NEXT: vmerge.vim v8, v8, 1, v0
; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m8, ta, ma
; RV64ZVE32-NEXT: vmv.s.x v0, a1
; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m1, ta, ma
-; RV64ZVE32-NEXT: vmerge.vim v8, v8, -1, v0
-; RV64ZVE32-NEXT: vmv.v.v v0, v8
+; RV64ZVE32-NEXT: vmerge.vim v12, v12, -1, v0
+; RV64ZVE32-NEXT: vmv.v.v v0, v12
; RV64ZVE32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; RV64ZVE32-NEXT: vmerge.vim v8, v12, 2, v0
+; RV64ZVE32-NEXT: vmerge.vim v8, v8, 2, v0
; RV64ZVE32-NEXT: ret
ret <512 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll
index c65f6e5fa7866..970f4b04e919a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll
@@ -34,9 +34,9 @@ define void @sext_v8i8_v8i32(ptr %x, ptr %z) {
; CHECK-LABEL: sext_v8i8_v8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsext.vf4 v10, v8
-; CHECK-NEXT: vse32.v v10, (a1)
+; CHECK-NEXT: vle8.v v10, (a0)
+; CHECK-NEXT: vsext.vf4 v8, v10
+; CHECK-NEXT: vse32.v v8, (a1)
; CHECK-NEXT: ret
%a = load <8 x i8>, ptr %x
%b = sext <8 x i8> %a to <8 x i32>
@@ -49,9 +49,9 @@ define void @sext_v32i8_v32i32(ptr %x, ptr %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: li a2, 32
; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsext.vf4 v16, v8
-; CHECK-NEXT: vse32.v v16, (a1)
+; CHECK-NEXT: vle8.v v16, (a0)
+; CHECK-NEXT: vsext.vf4 v8, v16
+; CHECK-NEXT: vse32.v v8, (a1)
; CHECK-NEXT: ret
%a = load <32 x i8>, ptr %x
%b = sext <32 x i8> %a to <32 x i32>
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
index 4911c340c9154..fc59bb0589a09 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
@@ -126,10 +126,11 @@ define <8 x i32> @interleave_v4i32(<4 x i32> %x, <4 x i32> %y) {
; V128-LABEL: interleave_v4i32:
; V128: # %bb.0:
; V128-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; V128-NEXT: vwaddu.vv v10, v8, v9
+; V128-NEXT: vmv1r.v v10, v9
+; V128-NEXT: vmv1r.v v11, v8
+; V128-NEXT: vwaddu.vv v8, v11, v10
; V128-NEXT: li a0, -1
-; V128-NEXT: vwmaccu.vx v10, a0, v9
-; V128-NEXT: vmv2r.v v8, v10
+; V128-NEXT: vwmaccu.vx v8, a0, v10
; V128-NEXT: ret
;
; V512-LABEL: interleave_v4i32:
@@ -228,10 +229,11 @@ define <16 x i16> @interleave_v8i16(<8 x i16> %x, <8 x i16> %y) {
; V128-LABEL: interleave_v8i16:
; V128: # %bb.0:
; V128-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; V128-NEXT: vwaddu.vv v10, v9, v8
+; V128-NEXT: vmv1r.v v10, v9
+; V128-NEXT: vmv1r.v v11, v8
+; V128-NEXT: vwaddu.vv v8, v10, v11
; V128-NEXT: li a0, -1
-; V128-NEXT: vwmaccu.vx v10, a0, v8
-; V128-NEXT: vmv2r.v v8, v10
+; V128-NEXT: vwmaccu.vx v8, a0, v11
; V128-NEXT: ret
;
; V512-LABEL: interleave_v8i16:
@@ -250,10 +252,11 @@ define <16 x i32> @interleave_v8i32(<8 x i32> %x, <8 x i32> %y) {
; V128-LABEL: interleave_v8i32:
; V128: # %bb.0:
; V128-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; V128-NEXT: vwaddu.vv v12, v8, v10
+; V128-NEXT: vmv2r.v v12, v10
+; V128-NEXT: vmv2r.v v14, v8
+; V128-NEXT: vwaddu.vv v8, v14, v12
; V128-NEXT: li a0, -1
-; V128-NEXT: vwmaccu.vx v12, a0, v10
-; V128-NEXT: vmv4r.v v8, v12
+; V128-NEXT: vwmaccu.vx v8, a0, v12
; V128-NEXT: ret
;
; V512-LABEL: interleave_v8i32:
@@ -272,10 +275,11 @@ define <32 x i8> @interleave_v16i8(<16 x i8> %x, <16 x i8> %y) {
; V128-LABEL: interleave_v16i8:
; V128: # %bb.0:
; V128-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; V128-NEXT: vwaddu.vv v10, v8, v9
+; V128-NEXT: vmv1r.v v10, v9
+; V128-NEXT: vmv1r.v v11, v8
+; V128-NEXT: vwaddu.vv v8, v11, v10
; V128-NEXT: li a0, -1
-; V128-NEXT: vwmaccu.vx v10, a0, v9
-; V128-NEXT: vmv2r.v v8, v10
+; V128-NEXT: vwmaccu.vx v8, a0, v10
; V128-NEXT: ret
;
; V512-LABEL: interleave_v16i8:
@@ -294,10 +298,11 @@ define <32 x i16> @interleave_v16i16(<16 x i16> %x, <16 x i16> %y) {
; V128-LABEL: interleave_v16i16:
; V128: # %bb.0:
; V128-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; V128-NEXT: vwaddu.vv v12, v8, v10
+; V128-NEXT: vmv2r.v v12, v10
+; V128-NEXT: vmv2r.v v14, v8
+; V128-NEXT: vwaddu.vv v8, v14, v12
; V128-NEXT: li a0, -1
-; V128-NEXT: vwmaccu.vx v12, a0, v10
-; V128-NEXT: vmv4r.v v8, v12
+; V128-NEXT: vwmaccu.vx v8, a0, v12
; V128-NEXT: ret
;
; V512-LABEL: interleave_v16i16:
@@ -316,19 +321,21 @@ define <32 x i32> @interleave_v16i32(<16 x i32> %x, <16 x i32> %y) {
; V128-LABEL: interleave_v16i32:
; V128: # %bb.0:
; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; V128-NEXT: vwaddu.vv v16, v8, v12
+; V128-NEXT: vmv4r.v v16, v12
+; V128-NEXT: vmv4r.v v20, v8
+; V128-NEXT: vwaddu.vv v8, v20, v16
; V128-NEXT: li a0, -1
-; V128-NEXT: vwmaccu.vx v16, a0, v12
-; V128-NEXT: vmv8r.v v8, v16
+; V128-NEXT: vwmaccu.vx v8, a0, v16
; V128-NEXT: ret
;
; V512-LABEL: interleave_v16i32:
; V512: # %bb.0:
; V512-NEXT: vsetivli zero, 16, e32, m1, ta, ma
-; V512-NEXT: vwaddu.vv v10, v8, v9
+; V512-NEXT: vmv1r.v v10, v9
+; V512-NEXT: vmv1r.v v11, v8
+; V512-NEXT: vwaddu.vv v8, v11, v10
; V512-NEXT: li a0, -1
-; V512-NEXT: vwmaccu.vx v10, a0, v9
-; V512-NEXT: vmv2r.v v8, v10
+; V512-NEXT: vwmaccu.vx v8, a0, v10
; V512-NEXT: ret
%a = shufflevector <16 x i32> %x, <16 x i32> %y, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
ret <32 x i32> %a
@@ -337,12 +344,14 @@ define <32 x i32> @interleave_v16i32(<16 x i32> %x, <16 x i32> %y) {
define <64 x i8> @interleave_v32i8(<32 x i8> %x, <32 x i8> %y) {
; V128-LABEL: interleave_v32i8:
; V128: # %bb.0:
+; V128-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; V128-NEXT: vmv2r.v v12, v10
+; V128-NEXT: vmv2r.v v14, v8
; V128-NEXT: li a0, 32
; V128-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; V128-NEXT: vwaddu.vv v12, v8, v10
+; V128-NEXT: vwaddu.vv v8, v14, v12
; V128-NEXT: li a0, -1
-; V128-NEXT: vwmaccu.vx v12, a0, v10
-; V128-NEXT: vmv4r.v v8, v12
+; V128-NEXT: vwmaccu.vx v8, a0, v12
; V128-NEXT: ret
;
; V512-LABEL: interleave_v32i8:
@@ -361,22 +370,26 @@ define <64 x i8> @interleave_v32i8(<32 x i8> %x, <32 x i8> %y) {
define <64 x i16> @interleave_v32i16(<32 x i16> %x, <32 x i16> %y) {
; V128-LABEL: interleave_v32i16:
; V128: # %bb.0:
+; V128-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; V128-NEXT: vmv4r.v v16, v12
+; V128-NEXT: vmv4r.v v20, v8
; V128-NEXT: li a0, 32
; V128-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; V128-NEXT: vwaddu.vv v16, v8, v12
+; V128-NEXT: vwaddu.vv v8, v20, v16
; V128-NEXT: li a0, -1
-; V128-NEXT: vwmaccu.vx v16, a0, v12
-; V128-NEXT: vmv8r.v v8, v16
+; V128-NEXT: vwmaccu.vx v8, a0, v16
; V128-NEXT: ret
;
; V512-LABEL: interleave_v32i16:
; V512: # %bb.0:
+; V512-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; V512-NEXT: vmv1r.v v10, v9
+; V512-NEXT: vmv1r.v v11, v8
; V512-NEXT: li a0, 32
; V512-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; V512-NEXT: vwaddu.vv v10, v8, v9
+; V512-NEXT: vwaddu.vv v8, v11, v10
; V512-NEXT: li a0, -1
-; V512-NEXT: vwmaccu.vx v10, a0, v9
-; V512-NEXT: vmv2r.v v8, v10
+; V512-NEXT: vwmaccu.vx v8, a0, v10
; V512-NEXT: ret
%a = shufflevector <32 x i16> %x, <32 x i16> %y, <64 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
ret <64 x i16> %a
@@ -424,12 +437,14 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) {
;
; V512-LABEL: interleave_v32i32:
; V512: # %bb.0:
+; V512-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; V512-NEXT: vmv2r.v v12, v10
+; V512-NEXT: vmv2r.v v14, v8
; V512-NEXT: li a0, 32
; V512-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; V512-NEXT: vwaddu.vv v12, v8, v10
+; V512-NEXT: vwaddu.vv v8, v14, v12
; V512-NEXT: li a0, -1
-; V512-NEXT: vwmaccu.vx v12, a0, v10
-; V512-NEXT: vmv4r.v v8, v12
+; V512-NEXT: vwmaccu.vx v8, a0, v12
; V512-NEXT: ret
%a = shufflevector <32 x i32> %x, <32 x i32> %y, <64 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
ret <64 x i32> %a
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
index cd73dbadb2d03..caf9c976b8c76 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
@@ -1063,15 +1063,15 @@ define <16 x i32> @shuffle_disjoint_lanes(<16 x i32> %v, <16 x i32> %w) {
; CHECK-NEXT: lui a0, %hi(.LCPI74_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI74_0)
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT: vle8.v v16, (a0)
+; CHECK-NEXT: vle8.v v18, (a0)
; CHECK-NEXT: lui a0, 11
; CHECK-NEXT: addi a0, a0, -1366
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vsext.vf2 v18, v16
+; CHECK-NEXT: vsext.vf2 v16, v18
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vrgatherei16.vv v8, v12, v18
+; CHECK-NEXT: vrgatherei16.vv v8, v12, v16
; CHECK-NEXT: ret
%out = shufflevector <16 x i32> %v, <16 x i32> %w, <16 x i32> <i32 11, i32 15, i32 7, i32 3, i32 26, i32 30, i32 22, i32 18, i32 9, i32 13, i32 5, i32 1, i32 24, i32 28, i32 20, i32 16>
ret <16 x i32> %out
@@ -1172,9 +1172,9 @@ define <4 x i128> @shuffle_i128(<4 x i128> %a) {
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV32-NEXT: vle32.v v8, (a2)
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vle16.v v12, (a1)
-; RV32-NEXT: vrgatherei16.vv v16, v8, v12
-; RV32-NEXT: vse64.v v16, (a0)
+; RV32-NEXT: vle16.v v16, (a1)
+; RV32-NEXT: vrgatherei16.vv v12, v8, v16
+; RV32-NEXT: vse64.v v12, (a0)
; RV32-NEXT: addi sp, s0, -128
; RV32-NEXT: .cfi_def_cfa sp, 128
; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
@@ -1213,9 +1213,9 @@ define <4 x i128> @shuffle_i128(<4 x i128> %a) {
; RV64-NEXT: addi a1, a1, %lo(.LCPI78_0)
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-NEXT: vle64.v v8, (a2)
-; RV64-NEXT: vle16.v v12, (a1)
-; RV64-NEXT: vrgatherei16.vv v16, v8, v12
-; RV64-NEXT: vse64.v v16, (a0)
+; RV64-NEXT: vle16.v v16, (a1)
+; RV64-NEXT: vrgatherei16.vv v12, v8, v16
+; RV64-NEXT: vse64.v v12, (a0)
; RV64-NEXT: addi sp, s0, -128
; RV64-NEXT: .cfi_def_cfa sp, 128
; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
@@ -1236,9 +1236,9 @@ define void @shuffle_i128_ldst(ptr %p) {
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: lui a1, %hi(.LCPI79_0)
; CHECK-NEXT: addi a1, a1, %lo(.LCPI79_0)
-; CHECK-NEXT: vle16.v v12, (a1)
-; CHECK-NEXT: vrgatherei16.vv v16, v8, v12
-; CHECK-NEXT: vse64.v v16, (a0)
+; CHECK-NEXT: vle16.v v16, (a1)
+; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
+; CHECK-NEXT: vse64.v v12, (a0)
; CHECK-NEXT: ret
%a = load <4 x i128>, ptr %p
%res = shufflevector <4 x i128> %a, <4 x i128> poison, <4 x i32> <i32 0, i32 0, i32 3, i32 2>
@@ -1252,12 +1252,12 @@ define void @shuffle_i256_ldst(ptr %p) {
; CHECK-NEXT: lui a1, %hi(.LCPI80_0)
; CHECK-NEXT: addi a1, a1, %lo(.LCPI80_0)
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT: vle8.v v8, (a1)
-; CHECK-NEXT: vle64.v v16, (a0)
-; CHECK-NEXT: vsext.vf2 v10, v8
+; CHECK-NEXT: vle8.v v16, (a1)
+; CHECK-NEXT: vle64.v v8, (a0)
+; CHECK-NEXT: vsext.vf2 v24, v16
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vrgatherei16.vv v24, v16, v10
-; CHECK-NEXT: vse64.v v24, (a0)
+; CHECK-NEXT: vrgatherei16.vv v16, v8, v24
+; CHECK-NEXT: vse64.v v16, (a0)
; CHECK-NEXT: ret
%a = load <4 x i256>, ptr %p
%res = shufflevector <4 x i256> %a, <4 x i256> poison, <4 x i32> <i32 0, i32 0, i32 3, i32 2>
@@ -1318,10 +1318,10 @@ define void @shuffle_i256_splat(ptr %p) nounwind {
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vmv.v.x v16, a1
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; RV32-NEXT: vsext.vf2 v18, v16
+; RV32-NEXT: vsext.vf2 v24, v16
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT: vrgatherei16.vv v24, v8, v18
-; RV32-NEXT: vse64.v v24, (a0)
+; RV32-NEXT: vrgatherei16.vv v16, v8, v24
+; RV32-NEXT: vse64.v v16, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: shuffle_i256_splat:
@@ -1333,10 +1333,10 @@ define void @shuffle_i256_splat(ptr %p) nounwind {
; RV64-NEXT: addi a1, a1, 1
; RV64-NEXT: slli a1, a1, 16
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT: vmv.v.x v16, a1
+; RV64-NEXT: vmv.v.x v24, a1
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vrgatherei16.vv v24, v8, v16
-; RV64-NEXT: vse64.v v24, (a0)
+; RV64-NEXT: vrgatherei16.vv v16, v8, v24
+; RV64-NEXT: vse64.v v16, (a0)
; RV64-NEXT: ret
%a = load <4 x i256>, ptr %p
%res = shufflevector <4 x i256> %a, <4 x i256> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
index 0436a27409f81..3bb5e179e0d06 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
@@ -3225,26 +3225,26 @@ define void @mulhu_v16i16(ptr %x) {
; RV32-NEXT: addi a1, a1, 289
; RV32-NEXT: vmv1r.v v0, v8
; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; RV32-NEXT: vmerge.vim v9, v9, 1, v0
+; RV32-NEXT: vmerge.vim v16, v9, 1, v0
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV32-NEXT: vmv.s.x v0, a1
; RV32-NEXT: lui a1, %hi(.LCPI182_0)
; RV32-NEXT: addi a1, a1, %lo(.LCPI182_0)
; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; RV32-NEXT: vmv.v.i v14, 3
-; RV32-NEXT: vmerge.vim v14, v14, 2, v0
-; RV32-NEXT: vle16.v v16, (a1)
+; RV32-NEXT: vmv.v.i v9, 3
+; RV32-NEXT: vmerge.vim v9, v9, 2, v0
+; RV32-NEXT: vle16.v v14, (a1)
; RV32-NEXT: vmv1r.v v0, v8
-; RV32-NEXT: vmerge.vim v8, v14, 1, v0
+; RV32-NEXT: vmerge.vim v17, v9, 1, v0
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV32-NEXT: vsext.vf2 v14, v9
-; RV32-NEXT: vsrl.vv v14, v10, v14
-; RV32-NEXT: vmulhu.vv v14, v14, v16
-; RV32-NEXT: vsub.vv v10, v10, v14
+; RV32-NEXT: vsext.vf2 v8, v16
+; RV32-NEXT: vsrl.vv v8, v10, v8
+; RV32-NEXT: vmulhu.vv v8, v8, v14
+; RV32-NEXT: vsub.vv v10, v10, v8
; RV32-NEXT: vmulhu.vv v10, v10, v12
-; RV32-NEXT: vadd.vv v10, v10, v14
-; RV32-NEXT: vsext.vf2 v12, v8
-; RV32-NEXT: vsrl.vv v8, v10, v12
+; RV32-NEXT: vadd.vv v8, v10, v8
+; RV32-NEXT: vsext.vf2 v10, v17
+; RV32-NEXT: vsrl.vv v8, v8, v10
; RV32-NEXT: vse16.v v8, (a0)
; RV32-NEXT: ret
;
@@ -3263,12 +3263,12 @@ define void @mulhu_v16i16(ptr %x) {
; RV64-NEXT: li a1, 1
; RV64-NEXT: slli a1, a1, 48
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT: vmv.v.x v14, a1
+; RV64-NEXT: vmv.v.x v16, a1
; RV64-NEXT: lui a1, %hi(.LCPI182_1)
; RV64-NEXT: ld a1, %lo(.LCPI182_1)(a1)
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; RV64-NEXT: vsext.vf2 v16, v14
-; RV64-NEXT: vsrl.vv v14, v8, v16
+; RV64-NEXT: vsext.vf2 v14, v16
+; RV64-NEXT: vsrl.vv v14, v8, v14
; RV64-NEXT: vmulhu.vv v12, v14, v12
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-NEXT: vmv.v.x v14, a1
@@ -3305,11 +3305,11 @@ define void @mulhu_v8i32(ptr %x) {
; CHECK-NEXT: vsub.vv v8, v8, v12
; CHECK-NEXT: vmulhu.vv v8, v8, v10
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vmv.v.x v14, a1
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vadd.vv v8, v8, v12
-; CHECK-NEXT: vsext.vf4 v12, v10
-; CHECK-NEXT: vsrl.vv v8, v8, v12
+; CHECK-NEXT: vsext.vf4 v10, v14
+; CHECK-NEXT: vsrl.vv v8, v8, v10
; CHECK-NEXT: vse32.v v8, (a0)
; CHECK-NEXT: ret
%a = load <8 x i32>, ptr %x
@@ -3482,26 +3482,26 @@ define void @mulhs_v4i64(ptr %x) {
; RV32-NEXT: vmv.s.x v0, a2
; RV32-NEXT: lui a2, 1048560
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vmv.v.x v10, a2
+; RV32-NEXT: vmv.v.x v14, a2
; RV32-NEXT: addi a2, a1, 1365
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.x v12, a2
+; RV32-NEXT: vmv.v.x v10, a2
; RV32-NEXT: li a2, 63
; RV32-NEXT: addi a1, a1, 1366
-; RV32-NEXT: vmerge.vxm v12, v12, a1, v0
+; RV32-NEXT: vmerge.vxm v10, v10, a1, v0
; RV32-NEXT: lui a1, 16
-; RV32-NEXT: vsext.vf4 v14, v10
+; RV32-NEXT: vsext.vf4 v12, v14
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vmulh.vv v10, v8, v12
-; RV32-NEXT: vmadd.vv v14, v8, v10
+; RV32-NEXT: vmulh.vv v10, v8, v10
+; RV32-NEXT: vmadd.vv v12, v8, v10
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vmv.v.x v8, a1
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vsext.vf4 v10, v8
+; RV32-NEXT: vsext.vf4 v8, v10
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vsrl.vx v8, v14, a2
-; RV32-NEXT: vsra.vv v10, v14, v10
-; RV32-NEXT: vadd.vv v8, v10, v8
+; RV32-NEXT: vsrl.vx v10, v12, a2
+; RV32-NEXT: vsra.vv v8, v12, v8
+; RV32-NEXT: vadd.vv v8, v8, v10
; RV32-NEXT: vse64.v v8, (a0)
; RV32-NEXT: ret
;
@@ -3513,7 +3513,7 @@ define void @mulhs_v4i64(ptr %x) {
; RV64-NEXT: lui a2, 1044496
; RV64-NEXT: addiw a1, a1, 1365
; RV64-NEXT: addi a2, a2, -256
-; RV64-NEXT: vmv.s.x v10, a2
+; RV64-NEXT: vmv.s.x v14, a2
; RV64-NEXT: slli a2, a1, 32
; RV64-NEXT: add a1, a1, a2
; RV64-NEXT: lui a2, %hi(.LCPI188_0)
@@ -3521,19 +3521,19 @@ define void @mulhs_v4i64(ptr %x) {
; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; RV64-NEXT: vmv.v.i v0, 5
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT: vmv.v.x v12, a1
+; RV64-NEXT: vmv.v.x v10, a1
; RV64-NEXT: li a1, 63
-; RV64-NEXT: vmerge.vxm v12, v12, a2, v0
+; RV64-NEXT: vmerge.vxm v10, v10, a2, v0
; RV64-NEXT: lui a2, 4096
; RV64-NEXT: addi a2, a2, 256
-; RV64-NEXT: vsext.vf8 v14, v10
-; RV64-NEXT: vmulh.vv v10, v8, v12
-; RV64-NEXT: vmadd.vv v14, v8, v10
-; RV64-NEXT: vmv.s.x v8, a2
-; RV64-NEXT: vsext.vf8 v10, v8
-; RV64-NEXT: vsrl.vx v8, v14, a1
-; RV64-NEXT: vsra.vv v10, v14, v10
-; RV64-NEXT: vadd.vv v8, v10, v8
+; RV64-NEXT: vsext.vf8 v12, v14
+; RV64-NEXT: vmulh.vv v10, v8, v10
+; RV64-NEXT: vmadd.vv v12, v8, v10
+; RV64-NEXT: vmv.s.x v10, a2
+; RV64-NEXT: vsext.vf8 v8, v10
+; RV64-NEXT: vsrl.vx v10, v12, a1
+; RV64-NEXT: vsra.vv v8, v12, v8
+; RV64-NEXT: vadd.vv v8, v8, v10
; RV64-NEXT: vse64.v v8, (a0)
; RV64-NEXT: ret
%a = load <4 x i64>, ptr %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index e93b264c5f4f0..954a5889f31ba 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -183,10 +183,10 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: li a3, 100
+; RV32-NEXT: li a3, 96
; RV32-NEXT: mul a2, a2, a3
; RV32-NEXT: sub sp, sp, a2
-; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xe4, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 100 * vlenb
+; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xe0, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 96 * vlenb
; RV32-NEXT: addi a4, a1, 128
; RV32-NEXT: addi a5, a1, 256
; RV32-NEXT: li a2, 32
@@ -197,7 +197,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vmv.s.x v3, a3
; RV32-NEXT: vle32.v v24, (a1)
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a6, 84
+; RV32-NEXT: li a6, 80
; RV32-NEXT: mul a1, a1, a6
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
@@ -205,7 +205,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV32-NEXT: vslideup.vi v8, v16, 4
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a6, 60
+; RV32-NEXT: li a6, 56
; RV32-NEXT: mul a1, a1, a6
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
@@ -213,14 +213,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; RV32-NEXT: vslidedown.vi v16, v16, 16
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a6, 92
+; RV32-NEXT: li a6, 88
; RV32-NEXT: mul a1, a1, a6
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; RV32-NEXT: vmv1r.v v0, v3
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a6, 92
+; RV32-NEXT: li a6, 88
; RV32-NEXT: mul a1, a1, a6
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
@@ -228,7 +228,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
; RV32-NEXT: vslideup.vi v8, v16, 10, v0.t
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a6, 72
+; RV32-NEXT: li a6, 68
; RV32-NEXT: mul a1, a1, a6
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
@@ -238,7 +238,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: addi a5, a5, 3
; RV32-NEXT: vmv.s.x v0, a5
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a4, 60
+; RV32-NEXT: li a4, 56
; RV32-NEXT: mul a1, a1, a4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
@@ -249,7 +249,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vmerge.vvm v24, v8, v24, v0
; RV32-NEXT: vmv1r.v v0, v3
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a4, 92
+; RV32-NEXT: li a4, 88
; RV32-NEXT: mul a1, a1, a4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
@@ -257,8 +257,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
; RV32-NEXT: vslideup.vi v4, v16, 8, v0.t
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a4, 68
-; RV32-NEXT: mul a1, a1, a4
+; RV32-NEXT: slli a1, a1, 6
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill
@@ -274,20 +273,20 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vrgatherei16.vv v16, v24, v6
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a4, 52
+; RV32-NEXT: li a4, 48
; RV32-NEXT: mul a1, a1, a4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; RV32-NEXT: vmv8r.v v16, v8
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a4, 76
+; RV32-NEXT: li a4, 72
; RV32-NEXT: mul a1, a1, a4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a4, 84
+; RV32-NEXT: li a4, 80
; RV32-NEXT: mul a1, a1, a4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
@@ -297,7 +296,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vrgatherei16.vv v16, v8, v4
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a4, 44
+; RV32-NEXT: li a4, 40
; RV32-NEXT: mul a1, a1, a4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
@@ -313,13 +312,13 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vmv.s.x v0, a5
; RV32-NEXT: addi a6, a6, 3
; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: li t1, 84
+; RV32-NEXT: li t1, 80
; RV32-NEXT: mul a5, a5, t1
; RV32-NEXT: add a5, sp, a5
; RV32-NEXT: addi a5, a5, 16
; RV32-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload
; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: li t1, 76
+; RV32-NEXT: li t1, 72
; RV32-NEXT: mul a5, a5, t1
; RV32-NEXT: add a5, sp, a5
; RV32-NEXT: addi a5, a5, 16
@@ -334,35 +333,29 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vmv.s.x v0, a6
; RV32-NEXT: addi a5, a7, 192
; RV32-NEXT: csrr a6, vlenb
-; RV32-NEXT: li a7, 92
+; RV32-NEXT: li a7, 88
; RV32-NEXT: mul a6, a6, a7
; RV32-NEXT: add a6, sp, a6
; RV32-NEXT: addi a6, a6, 16
; RV32-NEXT: vl8r.v v8, (a6) # Unknown-size Folded Reload
; RV32-NEXT: csrr a6, vlenb
-; RV32-NEXT: li a7, 60
+; RV32-NEXT: li a7, 56
; RV32-NEXT: mul a6, a6, a7
; RV32-NEXT: add a6, sp, a6
; RV32-NEXT: addi a6, a6, 16
; RV32-NEXT: vl8r.v v24, (a6) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT: vmerge.vvm v8, v8, v24, v0
-; RV32-NEXT: csrr a6, vlenb
-; RV32-NEXT: li a7, 28
-; RV32-NEXT: mul a6, a6, a7
-; RV32-NEXT: add a6, sp, a6
-; RV32-NEXT: addi a6, a6, 16
-; RV32-NEXT: vs4r.v v8, (a6) # Unknown-size Folded Spill
+; RV32-NEXT: vmerge.vvm v4, v8, v24, v0
; RV32-NEXT: vmv.s.x v0, a5
; RV32-NEXT: addi a3, a3, 12
; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: li a6, 84
+; RV32-NEXT: li a6, 80
; RV32-NEXT: mul a5, a5, a6
; RV32-NEXT: add a5, sp, a5
; RV32-NEXT: addi a5, a5, 16
; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload
; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: li a6, 76
+; RV32-NEXT: li a6, 72
; RV32-NEXT: mul a5, a5, a6
; RV32-NEXT: add a5, sp, a5
; RV32-NEXT: addi a5, a5, 16
@@ -370,15 +363,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vmerge.vvm v8, v8, v16, v0
; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: li a6, 36
-; RV32-NEXT: mul a5, a5, a6
+; RV32-NEXT: slli a5, a5, 5
; RV32-NEXT: add a5, sp, a5
; RV32-NEXT: addi a5, a5, 16
; RV32-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill
; RV32-NEXT: vmv.s.x v0, a3
; RV32-NEXT: addi a3, t0, 768
; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: li a6, 92
+; RV32-NEXT: li a6, 88
; RV32-NEXT: mul a5, a5, a6
; RV32-NEXT: add a5, sp, a5
; RV32-NEXT: addi a5, a5, 16
@@ -386,20 +378,21 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV32-NEXT: vmerge.vvm v8, v8, v24, v0
; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: slli a5, a5, 5
+; RV32-NEXT: li a6, 28
+; RV32-NEXT: mul a5, a5, a6
; RV32-NEXT: add a5, sp, a5
; RV32-NEXT: addi a5, a5, 16
; RV32-NEXT: vs4r.v v8, (a5) # Unknown-size Folded Spill
; RV32-NEXT: vmv.s.x v0, a3
; RV32-NEXT: lui a3, 3073
; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: li a6, 84
+; RV32-NEXT: li a6, 80
; RV32-NEXT: mul a5, a5, a6
; RV32-NEXT: add a5, sp, a5
; RV32-NEXT: addi a5, a5, 16
; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload
; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: li a6, 76
+; RV32-NEXT: li a6, 72
; RV32-NEXT: mul a5, a5, a6
; RV32-NEXT: add a5, sp, a5
; RV32-NEXT: addi a5, a5, 16
@@ -413,22 +406,22 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
; RV32-NEXT: vmv.s.x v0, a4
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT: vle16.v v2, (a1)
+; RV32-NEXT: vle16.v v28, (a1)
; RV32-NEXT: addi a1, a3, -1024
; RV32-NEXT: vmv4r.v v8, v24
; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: li a4, 92
+; RV32-NEXT: li a4, 88
; RV32-NEXT: mul a3, a3, a4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
-; RV32-NEXT: vmerge.vvm v12, v16, v24, v0
+; RV32-NEXT: vmerge.vvm v16, v16, v24, v0
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: li a4, 24
; RV32-NEXT: mul a3, a3, a4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
-; RV32-NEXT: vs4r.v v12, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: vs4r.v v16, (a3) # Unknown-size Folded Spill
; RV32-NEXT: vmv.s.x v0, a1
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 3
@@ -436,17 +429,17 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT: vrgatherei16.vv v8, v16, v2
+; RV32-NEXT: vrgatherei16.vv v8, v16, v28
; RV32-NEXT: addi a1, sp, 16
; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 84
+; RV32-NEXT: li a3, 80
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 76
+; RV32-NEXT: li a3, 72
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
@@ -454,7 +447,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vmerge.vvm v16, v8, v16, v0
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 84
+; RV32-NEXT: li a2, 80
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
@@ -466,7 +459,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vle16.v v12, (a1)
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 92
+; RV32-NEXT: li a2, 88
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
@@ -474,19 +467,19 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV32-NEXT: vmerge.vvm v8, v16, v24, v0
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 92
+; RV32-NEXT: li a2, 88
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 72
+; RV32-NEXT: li a2, 68
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 52
+; RV32-NEXT: li a2, 48
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
@@ -494,77 +487,69 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma
; RV32-NEXT: vmv.v.v v16, v24
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 72
+; RV32-NEXT: li a2, 68
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 68
-; RV32-NEXT: mul a1, a1, a2
+; RV32-NEXT: slli a1, a1, 6
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 44
+; RV32-NEXT: li a2, 40
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vmv.v.v v16, v24
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 68
-; RV32-NEXT: mul a1, a1, a2
+; RV32-NEXT: slli a1, a1, 6
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 28
-; RV32-NEXT: mul a1, a1, a2
-; RV32-NEXT: add a1, sp, a1
-; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vrgatherei16.vv v4, v8, v12
+; RV32-NEXT: vrgatherei16.vv v20, v4, v12
; RV32-NEXT: addi a1, sp, 16
; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
-; RV32-NEXT: vmv.v.v v4, v8
+; RV32-NEXT: vmv.v.v v20, v8
; RV32-NEXT: lui a1, %hi(.LCPI8_4)
; RV32-NEXT: addi a1, a1, %lo(.LCPI8_4)
; RV32-NEXT: lui a2, %hi(.LCPI8_5)
; RV32-NEXT: addi a2, a2, %lo(.LCPI8_5)
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; RV32-NEXT: vle16.v v0, (a1)
+; RV32-NEXT: vle16.v v16, (a1)
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v16, (a2)
+; RV32-NEXT: vle16.v v24, (a2)
; RV32-NEXT: lui a1, %hi(.LCPI8_6)
; RV32-NEXT: addi a1, a1, %lo(.LCPI8_6)
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vle16.v v2, (a1)
+; RV32-NEXT: vle16.v v18, (a1)
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 36
-; RV32-NEXT: mul a1, a1, a2
+; RV32-NEXT: slli a1, a1, 5
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v8, v24, v0
+; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vrgatherei16.vv v8, v0, v16
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 5
+; RV32-NEXT: li a2, 28
+; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vrgatherei16.vv v24, v12, v16
+; RV32-NEXT: vrgatherei16.vv v0, v12, v24
; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
-; RV32-NEXT: vmv.v.v v24, v8
+; RV32-NEXT: vmv.v.v v0, v8
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vrgatherei16.vv v8, v16, v2
+; RV32-NEXT: vrgatherei16.vv v8, v24, v18
; RV32-NEXT: lui a1, %hi(.LCPI8_7)
; RV32-NEXT: addi a1, a1, %lo(.LCPI8_7)
; RV32-NEXT: lui a2, %hi(.LCPI8_8)
@@ -574,62 +559,85 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: lui a1, %hi(.LCPI8_9)
; RV32-NEXT: addi a1, a1, %lo(.LCPI8_9)
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; RV32-NEXT: vle16.v v28, (a2)
+; RV32-NEXT: vle16.v v14, (a2)
+; RV32-NEXT: csrr a2, vlenb
+; RV32-NEXT: li a3, 72
+; RV32-NEXT: mul a2, a2, a3
+; RV32-NEXT: add a2, sp, a2
+; RV32-NEXT: addi a2, a2, 16
+; RV32-NEXT: vs2r.v v14, (a2) # Unknown-size Folded Spill
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vle16.v v30, (a1)
+; RV32-NEXT: vle16.v v13, (a1)
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: li a2, 56
+; RV32-NEXT: mul a1, a1, a2
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vs1r.v v13, (a1) # Unknown-size Folded Spill
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: li a2, 24
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v0, v16, v12
+; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vrgatherei16.vv v16, v4, v12
; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
-; RV32-NEXT: vmv.v.v v0, v8
+; RV32-NEXT: vmv.v.v v16, v8
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 84
+; RV32-NEXT: li a2, 80
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: li a2, 72
+; RV32-NEXT: mul a1, a1, a2
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vl2r.v v6, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vrgatherei16.vv v8, v16, v28
+; RV32-NEXT: vrgatherei16.vv v8, v24, v6
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 92
+; RV32-NEXT: li a2, 88
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: li a2, 56
+; RV32-NEXT: mul a1, a1, a2
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vl1r.v v28, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vrgatherei16.vv v12, v16, v30
+; RV32-NEXT: vrgatherei16.vv v12, v24, v28
; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
; RV32-NEXT: vmv.v.v v12, v8
; RV32-NEXT: addi a1, a0, 320
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV32-NEXT: vse32.v v12, (a1)
; RV32-NEXT: addi a1, a0, 256
-; RV32-NEXT: vse32.v v0, (a1)
+; RV32-NEXT: vse32.v v16, (a1)
; RV32-NEXT: addi a1, a0, 192
-; RV32-NEXT: vse32.v v24, (a1)
+; RV32-NEXT: vse32.v v0, (a1)
; RV32-NEXT: addi a1, a0, 128
-; RV32-NEXT: vse32.v v4, (a1)
+; RV32-NEXT: vse32.v v20, (a1)
; RV32-NEXT: addi a1, a0, 64
; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: li a3, 68
-; RV32-NEXT: mul a2, a2, a3
+; RV32-NEXT: slli a2, a2, 6
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 16
; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload
; RV32-NEXT: vse32.v v8, (a1)
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 72
+; RV32-NEXT: li a2, 68
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vse32.v v8, (a0)
; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: li a1, 100
+; RV32-NEXT: li a1, 96
; RV32-NEXT: mul a0, a0, a1
; RV32-NEXT: add sp, sp, a0
; RV32-NEXT: .cfi_def_cfa sp, 16
@@ -642,14 +650,15 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 95
+; RV64-NEXT: li a3, 93
; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: sub sp, sp, a2
-; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xdf, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 95 * vlenb
+; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xdd, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 93 * vlenb
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vle64.v v8, (a1)
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: slli a2, a2, 1
+; RV64-NEXT: li a3, 53
+; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
@@ -662,7 +671,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: addi a3, a3, %lo(.LCPI8_0)
; RV64-NEXT: vmv.s.x v0, a4
; RV64-NEXT: csrr a4, vlenb
-; RV64-NEXT: li a5, 71
+; RV64-NEXT: li a5, 61
; RV64-NEXT: mul a4, a4, a5
; RV64-NEXT: add a4, sp, a4
; RV64-NEXT: addi a4, a4, 16
@@ -673,21 +682,21 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, ma
; RV64-NEXT: vslidedown.vi v16, v8, 8
; RV64-NEXT: csrr a5, vlenb
-; RV64-NEXT: slli a6, a5, 6
-; RV64-NEXT: sub a5, a6, a5
+; RV64-NEXT: li a6, 77
+; RV64-NEXT: mul a5, a5, a6
; RV64-NEXT: add a5, sp, a5
; RV64-NEXT: addi a5, a5, 16
; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
; RV64-NEXT: csrr a5, vlenb
-; RV64-NEXT: slli a6, a5, 6
-; RV64-NEXT: sub a5, a6, a5
+; RV64-NEXT: li a6, 77
+; RV64-NEXT: mul a5, a5, a6
; RV64-NEXT: add a5, sp, a5
; RV64-NEXT: addi a5, a5, 16
; RV64-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64-NEXT: vslideup.vi v24, v16, 5, v0.t
; RV64-NEXT: csrr a5, vlenb
-; RV64-NEXT: li a6, 83
+; RV64-NEXT: li a6, 73
; RV64-NEXT: mul a5, a5, a6
; RV64-NEXT: add a5, sp, a5
; RV64-NEXT: addi a5, a5, 16
@@ -695,7 +704,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vle64.v v24, (a2)
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a5, 87
+; RV64-NEXT: li a5, 85
; RV64-NEXT: mul a2, a2, a5
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
@@ -703,20 +712,21 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: vle16.v v12, (a3)
; RV64-NEXT: vmv.s.x v0, a4
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 87
+; RV64-NEXT: li a3, 85
; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: slli a2, a2, 1
+; RV64-NEXT: li a3, 53
+; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
; RV64-NEXT: vmerge.vvm v24, v24, v16, v0
; RV64-NEXT: vrgatherei16.vv v0, v24, v12
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 47
+; RV64-NEXT: li a3, 37
; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
@@ -724,21 +734,21 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64-NEXT: vslideup.vi v12, v8, 1
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 71
+; RV64-NEXT: li a3, 61
; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: vl1r.v v7, (a2) # Unknown-size Folded Reload
; RV64-NEXT: vmv1r.v v0, v7
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: slli a3, a2, 6
-; RV64-NEXT: sub a2, a3, a2
+; RV64-NEXT: li a3, 77
+; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
; RV64-NEXT: vslideup.vi v12, v24, 4, v0.t
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 79
+; RV64-NEXT: li a3, 69
; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
@@ -751,7 +761,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: addi a2, a3, 260
; RV64-NEXT: vmv8r.v v24, v16
; RV64-NEXT: csrr a3, vlenb
-; RV64-NEXT: li a5, 87
+; RV64-NEXT: li a5, 85
; RV64-NEXT: mul a3, a3, a5
; RV64-NEXT: add a3, sp, a3
; RV64-NEXT: addi a3, a3, 16
@@ -759,8 +769,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vmerge.vvm v16, v16, v24, v0
; RV64-NEXT: csrr a3, vlenb
-; RV64-NEXT: li a5, 18
-; RV64-NEXT: mul a3, a3, a5
+; RV64-NEXT: slli a3, a3, 3
; RV64-NEXT: add a3, sp, a3
; RV64-NEXT: addi a3, a3, 16
; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
@@ -768,21 +777,21 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: vmv.s.x v2, a4
; RV64-NEXT: vmv4r.v v12, v8
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 87
+; RV64-NEXT: li a3, 85
; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
; RV64-NEXT: vmerge.vvm v16, v16, v24, v0
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 39
+; RV64-NEXT: li a3, 29
; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; RV64-NEXT: vmv1r.v v0, v2
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 55
+; RV64-NEXT: li a3, 45
; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
@@ -791,15 +800,15 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: vslideup.vi v12, v8, 5, v0.t
; RV64-NEXT: vmv1r.v v0, v7
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: slli a3, a2, 6
-; RV64-NEXT: sub a2, a3, a2
+; RV64-NEXT: li a3, 77
+; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
-; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
-; RV64-NEXT: vrgather.vi v12, v16, 4, v0.t
+; RV64-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
+; RV64-NEXT: vrgather.vi v12, v24, 4, v0.t
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 75
-; RV64-NEXT: mul a2, a2, a3
+; RV64-NEXT: slli a3, a2, 6
+; RV64-NEXT: add a2, a3, a2
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: vs4r.v v12, (a2) # Unknown-size Folded Spill
@@ -807,133 +816,139 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: vmv1r.v v0, v2
; RV64-NEXT: vslideup.vi v12, v8, 4, v0.t
; RV64-NEXT: vmv1r.v v0, v7
-; RV64-NEXT: vrgather.vi v12, v16, 5, v0.t
+; RV64-NEXT: vrgather.vi v12, v24, 5, v0.t
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 35
+; RV64-NEXT: li a3, 25
; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: vs4r.v v12, (a2) # Unknown-size Folded Spill
-; RV64-NEXT: vmv4r.v v8, v16
; RV64-NEXT: lui a2, 8
; RV64-NEXT: addi a2, a2, 520
; RV64-NEXT: vmv.s.x v0, a2
-; RV64-NEXT: vslideup.vi v12, v16, 6
+; RV64-NEXT: vslideup.vi v12, v24, 6
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 87
+; RV64-NEXT: li a3, 85
; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV64-NEXT: csrr a2, vlenb
+; RV64-NEXT: li a3, 53
+; RV64-NEXT: mul a2, a2, a3
+; RV64-NEXT: add a2, sp, a2
+; RV64-NEXT: addi a2, a2, 16
+; RV64-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vmerge.vvm v16, v16, v24, v0
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 27
-; RV64-NEXT: mul a2, a2, a3
+; RV64-NEXT: slli a3, a2, 4
+; RV64-NEXT: add a2, a3, a2
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; RV64-NEXT: vmv1r.v v0, v7
+; RV64-NEXT: csrr a2, vlenb
+; RV64-NEXT: li a3, 77
+; RV64-NEXT: mul a2, a2, a3
+; RV64-NEXT: add a2, sp, a2
+; RV64-NEXT: addi a2, a2, 16
+; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
-; RV64-NEXT: vslideup.vi v12, v8, 1, v0.t
+; RV64-NEXT: vslideup.vi v12, v16, 1, v0.t
; RV64-NEXT: lui a2, %hi(.LCPI8_1)
; RV64-NEXT: addi a2, a2, %lo(.LCPI8_1)
; RV64-NEXT: li a3, 192
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; RV64-NEXT: vle16.v v16, (a2)
-; RV64-NEXT: addi a2, sp, 16
-; RV64-NEXT: vs2r.v v16, (a2) # Unknown-size Folded Spill
+; RV64-NEXT: vle16.v v6, (a2)
; RV64-NEXT: vmv.s.x v0, a3
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 26
-; RV64-NEXT: mul a2, a2, a3
+; RV64-NEXT: slli a2, a2, 4
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 55
+; RV64-NEXT: li a3, 45
; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vrgather.vi v20, v16, 2
-; RV64-NEXT: vmerge.vvm v12, v20, v12, v0
+; RV64-NEXT: vrgather.vi v28, v16, 2
+; RV64-NEXT: vmerge.vvm v16, v28, v12, v0
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 71
+; RV64-NEXT: li a3, 61
; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
-; RV64-NEXT: vs4r.v v12, (a2) # Unknown-size Folded Spill
+; RV64-NEXT: vs4r.v v16, (a2) # Unknown-size Folded Spill
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 18
-; RV64-NEXT: mul a2, a2, a3
+; RV64-NEXT: slli a2, a2, 3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
-; RV64-NEXT: addi a2, sp, 16
-; RV64-NEXT: vl2r.v v12, (a2) # Unknown-size Folded Reload
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vrgatherei16.vv v0, v16, v12
-; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 10
-; RV64-NEXT: mul a2, a2, a3
-; RV64-NEXT: add a2, sp, a2
-; RV64-NEXT: addi a2, a2, 16
-; RV64-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill
+; RV64-NEXT: vrgatherei16.vv v24, v16, v6
+; RV64-NEXT: addi a2, sp, 16
+; RV64-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; RV64-NEXT: lui a2, %hi(.LCPI8_2)
; RV64-NEXT: addi a2, a2, %lo(.LCPI8_2)
; RV64-NEXT: li a3, 1040
; RV64-NEXT: vmv.s.x v0, a3
; RV64-NEXT: addi a1, a1, -2016
; RV64-NEXT: csrr a3, vlenb
-; RV64-NEXT: li a4, 87
+; RV64-NEXT: li a4, 85
; RV64-NEXT: mul a3, a3, a4
; RV64-NEXT: add a3, sp, a3
; RV64-NEXT: addi a3, a3, 16
-; RV64-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
-; RV64-NEXT: vmerge.vvm v16, v16, v24, v0
+; RV64-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
; RV64-NEXT: csrr a3, vlenb
-; RV64-NEXT: li a4, 18
+; RV64-NEXT: li a4, 53
; RV64-NEXT: mul a3, a3, a4
; RV64-NEXT: add a3, sp, a3
; RV64-NEXT: addi a3, a3, 16
-; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV64-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV64-NEXT: vmerge.vvm v8, v24, v16, v0
+; RV64-NEXT: csrr a3, vlenb
+; RV64-NEXT: slli a3, a3, 3
+; RV64-NEXT: add a3, sp, a3
+; RV64-NEXT: addi a3, a3, 16
+; RV64-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
; RV64-NEXT: vmv.s.x v0, a1
-; RV64-NEXT: vle16.v v12, (a2)
+; RV64-NEXT: vle16.v v6, (a2)
; RV64-NEXT: li a1, 64
+; RV64-NEXT: vmerge.vvm v8, v24, v16, v0
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 87
-; RV64-NEXT: mul a2, a2, a3
-; RV64-NEXT: add a2, sp, a2
-; RV64-NEXT: addi a2, a2, 16
-; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
-; RV64-NEXT: vmerge.vvm v16, v16, v24, v0
-; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 87
+; RV64-NEXT: li a3, 85
; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
-; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; RV64-NEXT: vmv.s.x v0, a1
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 39
+; RV64-NEXT: li a2, 29
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgatherei16.vv v24, v16, v12
-; RV64-NEXT: vmv4r.v v12, v8
+; RV64-NEXT: vrgatherei16.vv v24, v16, v6
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: li a2, 77
+; RV64-NEXT: mul a1, a1, a2
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: addi a1, a1, 16
+; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vmv4r.v v28, v8
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
-; RV64-NEXT: vslideup.vi v12, v8, 5, v0.t
+; RV64-NEXT: vslideup.vi v28, v8, 5, v0.t
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 83
+; RV64-NEXT: li a2, 73
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 47
+; RV64-NEXT: li a2, 37
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
@@ -941,26 +956,22 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma
; RV64-NEXT: vmv.v.v v8, v0
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 83
+; RV64-NEXT: li a2, 73
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 10
-; RV64-NEXT: mul a1, a1, a2
-; RV64-NEXT: add a1, sp, a1
-; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 79
+; RV64-NEXT: li a2, 69
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vmv.v.v v8, v0
+; RV64-NEXT: addi a1, sp, 16
+; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vmv.v.v v8, v16
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 79
+; RV64-NEXT: li a2, 69
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
@@ -968,128 +979,138 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: lui a1, %hi(.LCPI8_3)
; RV64-NEXT: addi a1, a1, %lo(.LCPI8_3)
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; RV64-NEXT: vle16.v v8, (a1)
+; RV64-NEXT: vle16.v v20, (a1)
; RV64-NEXT: lui a1, %hi(.LCPI8_4)
; RV64-NEXT: addi a1, a1, %lo(.LCPI8_4)
-; RV64-NEXT: vle16.v v10, (a1)
+; RV64-NEXT: vle16.v v8, (a1)
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 75
+; RV64-NEXT: li a2, 77
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vs2r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: slli a2, a1, 6
+; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: addi a1, a1, 16
+; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma
-; RV64-NEXT: vmv.v.v v16, v24
+; RV64-NEXT: vmv.v.v v8, v24
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 75
-; RV64-NEXT: mul a1, a1, a2
+; RV64-NEXT: slli a2, a1, 6
+; RV64-NEXT: add a1, a2, a1
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 27
-; RV64-NEXT: mul a1, a1, a2
+; RV64-NEXT: slli a2, a1, 4
+; RV64-NEXT: add a1, a2, a1
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vrgatherei16.vv v24, v0, v8
+; RV64-NEXT: vrgatherei16.vv v0, v8, v20
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 35
+; RV64-NEXT: li a2, 25
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma
-; RV64-NEXT: vmv.v.v v4, v24
+; RV64-NEXT: vmv.v.v v12, v0
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 18
-; RV64-NEXT: mul a1, a1, a2
+; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: li a2, 77
+; RV64-NEXT: mul a1, a1, a2
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: addi a1, a1, 16
+; RV64-NEXT: vl2r.v v8, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vrgatherei16.vv v24, v16, v10
+; RV64-NEXT: vrgatherei16.vv v0, v16, v8
; RV64-NEXT: lui a1, %hi(.LCPI8_5)
; RV64-NEXT: addi a1, a1, %lo(.LCPI8_5)
-; RV64-NEXT: vle16.v v8, (a1)
+; RV64-NEXT: vle16.v v20, (a1)
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 71
+; RV64-NEXT: li a2, 61
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma
-; RV64-NEXT: vmv.v.v v28, v24
+; RV64-NEXT: vmv.v.v v8, v0
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 71
+; RV64-NEXT: li a2, 61
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs4r.v v28, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 55
+; RV64-NEXT: li a2, 45
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vrgather.vi v28, v24, 3
+; RV64-NEXT: vrgather.vi v8, v0, 3
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 26
-; RV64-NEXT: mul a1, a1, a2
+; RV64-NEXT: slli a1, a1, 4
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vmerge.vvm v12, v28, v12, v0
+; RV64-NEXT: vmerge.vvm v8, v8, v28, v0
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 87
+; RV64-NEXT: li a2, 85
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vrgatherei16.vv v24, v16, v8
+; RV64-NEXT: vrgatherei16.vv v24, v0, v20
; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma
-; RV64-NEXT: vmv.v.v v12, v24
+; RV64-NEXT: vmv.v.v v8, v24
; RV64-NEXT: addi a1, a0, 256
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 71
+; RV64-NEXT: li a3, 61
; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
-; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload
+; RV64-NEXT: vl4r.v v20, (a2) # Unknown-size Folded Reload
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vse64.v v8, (a1)
+; RV64-NEXT: vse64.v v20, (a1)
; RV64-NEXT: addi a1, a0, 320
-; RV64-NEXT: vse64.v v12, (a1)
+; RV64-NEXT: vse64.v v8, (a1)
; RV64-NEXT: addi a1, a0, 192
-; RV64-NEXT: vse64.v v4, (a1)
+; RV64-NEXT: vse64.v v12, (a1)
; RV64-NEXT: addi a1, a0, 128
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 75
-; RV64-NEXT: mul a2, a2, a3
+; RV64-NEXT: slli a3, a2, 6
+; RV64-NEXT: add a2, a3, a2
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload
; RV64-NEXT: vse64.v v8, (a1)
; RV64-NEXT: addi a1, a0, 64
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 79
+; RV64-NEXT: li a3, 69
; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload
; RV64-NEXT: vse64.v v8, (a1)
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 83
+; RV64-NEXT: li a2, 73
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vse64.v v8, (a0)
; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: li a1, 95
+; RV64-NEXT: li a1, 93
; RV64-NEXT: mul a0, a0, a1
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: .cfi_def_cfa sp, 16
@@ -1290,17 +1311,17 @@ define void @load_factor4_one_active_storeback_full(ptr %ptr) {
; CHECK-LABEL: load_factor4_one_active_storeback_full:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vle32.v v12, (a0)
; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v12, v8, 4
-; CHECK-NEXT: vmv1r.v v13, v8
-; CHECK-NEXT: vmv1r.v v14, v12
+; CHECK-NEXT: vslidedown.vi v8, v12, 4
+; CHECK-NEXT: vmv1r.v v17, v12
+; CHECK-NEXT: vmv1r.v v18, v8
; CHECK-NEXT: vsetivli zero, 4, e32, m4, ta, ma
-; CHECK-NEXT: vslidedown.vi v16, v8, 8
-; CHECK-NEXT: vmv1r.v v15, v16
-; CHECK-NEXT: vslidedown.vi v16, v8, 12
+; CHECK-NEXT: vslidedown.vi v8, v12, 8
+; CHECK-NEXT: vmv1r.v v19, v8
+; CHECK-NEXT: vslidedown.vi v20, v12, 12
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vsseg4e32.v v13, (a0)
+; CHECK-NEXT: vsseg4e32.v v17, (a0)
; CHECK-NEXT: ret
%interleaved.vec = load <16 x i32>, ptr %ptr
%v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-inttoptr-ptrtoint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-inttoptr-ptrtoint.ll
index 03ce5bea55665..8b6270e86af36 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-inttoptr-ptrtoint.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-inttoptr-ptrtoint.ll
@@ -7,8 +7,8 @@ define <4 x ptr> @inttoptr_v4p0_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %e
; CHECK-LABEL: inttoptr_v4p0_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v10, v8, v0.t
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vzext.vf2 v8, v10, v0.t
; CHECK-NEXT: ret
%v = call <4 x ptr> @llvm.vp.inttoptr.v4p0.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
ret <4 x ptr> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint-vp.ll
index 8282b8884aed6..466fe744a1376 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint-vp.ll
@@ -46,15 +46,15 @@ define <3 x i64> @llrint_v3i64_v3f32(<3 x float> %x, <3 x i1> %m, i32 zeroext %e
; RV32-LABEL: llrint_v3i64_v3f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT: vfwcvt.x.f.v v10, v8, v0.t
-; RV32-NEXT: vmv2r.v v8, v10
+; RV32-NEXT: vmv1r.v v10, v8
+; RV32-NEXT: vfwcvt.x.f.v v8, v10, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: llrint_v3i64_v3f32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT: vfwcvt.x.f.v v10, v8, v0.t
-; RV64-NEXT: vmv2r.v v8, v10
+; RV64-NEXT: vmv1r.v v10, v8
+; RV64-NEXT: vfwcvt.x.f.v v8, v10, v0.t
; RV64-NEXT: ret
%a = call <3 x i64> @llvm.vp.llrint.v3i64.v3f32(<3 x float> %x, <3 x i1> %m, i32 %evl)
ret <3 x i64> %a
@@ -65,15 +65,15 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x, <4 x i1> %m, i32 zeroext %e
; RV32-LABEL: llrint_v4i64_v4f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT: vfwcvt.x.f.v v10, v8, v0.t
-; RV32-NEXT: vmv2r.v v8, v10
+; RV32-NEXT: vmv1r.v v10, v8
+; RV32-NEXT: vfwcvt.x.f.v v8, v10, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: llrint_v4i64_v4f32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT: vfwcvt.x.f.v v10, v8, v0.t
-; RV64-NEXT: vmv2r.v v8, v10
+; RV64-NEXT: vmv1r.v v10, v8
+; RV64-NEXT: vfwcvt.x.f.v v8, v10, v0.t
; RV64-NEXT: ret
%a = call <4 x i64> @llvm.vp.llrint.v4i64.v4f32(<4 x float> %x, <4 x i1> %m, i32 %evl)
ret <4 x i64> %a
@@ -84,15 +84,15 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x, <8 x i1> %m, i32 zeroext %e
; RV32-LABEL: llrint_v8i64_v8f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT: vfwcvt.x.f.v v12, v8, v0.t
-; RV32-NEXT: vmv4r.v v8, v12
+; RV32-NEXT: vmv2r.v v12, v8
+; RV32-NEXT: vfwcvt.x.f.v v8, v12, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: llrint_v8i64_v8f32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT: vfwcvt.x.f.v v12, v8, v0.t
-; RV64-NEXT: vmv4r.v v8, v12
+; RV64-NEXT: vmv2r.v v12, v8
+; RV64-NEXT: vfwcvt.x.f.v v8, v12, v0.t
; RV64-NEXT: ret
%a = call <8 x i64> @llvm.vp.llrint.v8i64.v8f32(<8 x float> %x, <8 x i1> %m, i32 %evl)
ret <8 x i64> %a
@@ -103,15 +103,15 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x, <16 x i1> %m, i32 zeroe
; RV32-LABEL: llrint_v16i64_v16f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT: vfwcvt.x.f.v v16, v8, v0.t
-; RV32-NEXT: vmv8r.v v8, v16
+; RV32-NEXT: vmv4r.v v16, v8
+; RV32-NEXT: vfwcvt.x.f.v v8, v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: llrint_v16i64_v16f32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT: vfwcvt.x.f.v v16, v8, v0.t
-; RV64-NEXT: vmv8r.v v8, v16
+; RV64-NEXT: vmv4r.v v16, v8
+; RV64-NEXT: vfwcvt.x.f.v v8, v16, v0.t
; RV64-NEXT: ret
%a = call <16 x i64> @llvm.vp.llrint.v16i64.v16f32(<16 x float> %x, <16 x i1> %m, i32 %evl)
ret <16 x i64> %a
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
index f27614c93985f..9bd116b2303c8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
@@ -189,18 +189,18 @@ define <3 x i64> @llrint_v3i64_v3f32(<3 x float> %x) {
; RV64-NEXT: vslidedown.vi v9, v8, 1
; RV64-NEXT: vfmv.f.s fa5, v8
; RV64-NEXT: vslidedown.vi v10, v8, 2
-; RV64-NEXT: vslidedown.vi v8, v8, 3
+; RV64-NEXT: vslidedown.vi v11, v8, 3
; RV64-NEXT: fcvt.l.s a0, fa5
; RV64-NEXT: vfmv.f.s fa5, v9
; RV64-NEXT: fcvt.l.s a1, fa5
; RV64-NEXT: vfmv.f.s fa5, v10
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT: vmv.v.x v10, a0
+; RV64-NEXT: vmv.v.x v8, a0
; RV64-NEXT: fcvt.l.s a0, fa5
; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: vfmv.f.s fa5, v11
; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v10, a1
+; RV64-NEXT: vslide1down.vx v8, v8, a1
; RV64-NEXT: vslide1down.vx v8, v8, a0
; RV64-NEXT: fcvt.l.s a0, fa5
; RV64-NEXT: vslide1down.vx v8, v8, a0
@@ -298,18 +298,18 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
; RV64-NEXT: vslidedown.vi v9, v8, 1
; RV64-NEXT: vfmv.f.s fa5, v8
; RV64-NEXT: vslidedown.vi v10, v8, 2
-; RV64-NEXT: vslidedown.vi v8, v8, 3
+; RV64-NEXT: vslidedown.vi v11, v8, 3
; RV64-NEXT: fcvt.l.s a0, fa5
; RV64-NEXT: vfmv.f.s fa5, v9
; RV64-NEXT: fcvt.l.s a1, fa5
; RV64-NEXT: vfmv.f.s fa5, v10
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT: vmv.v.x v10, a0
+; RV64-NEXT: vmv.v.x v8, a0
; RV64-NEXT: fcvt.l.s a0, fa5
; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: vfmv.f.s fa5, v11
; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v10, a1
+; RV64-NEXT: vslide1down.vx v8, v8, a1
; RV64-NEXT: vslide1down.vx v8, v8, a0
; RV64-NEXT: fcvt.l.s a0, fa5
; RV64-NEXT: vslide1down.vx v8, v8, a0
@@ -618,42 +618,42 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
; RV64-NEXT: vfmv.f.s fa4, v8
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v10, v8, 3
-; RV64-NEXT: vslidedown.vi v11, v8, 2
+; RV64-NEXT: vslidedown.vi v12, v8, 2
; RV64-NEXT: fcvt.l.s a0, fa5
; RV64-NEXT: sd a0, 248(sp)
; RV64-NEXT: flw fa5, 120(sp)
-; RV64-NEXT: vslidedown.vi v12, v8, 1
+; RV64-NEXT: vslidedown.vi v13, v8, 1
; RV64-NEXT: fcvt.l.s a0, fa4
; RV64-NEXT: vfmv.f.s fa4, v10
; RV64-NEXT: fcvt.l.s a1, fa5
; RV64-NEXT: sd a1, 240(sp)
; RV64-NEXT: flw fa5, 116(sp)
; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v14, v8, 7
+; RV64-NEXT: vslidedown.vi v10, v8, 7
; RV64-NEXT: fcvt.l.s a1, fa4
-; RV64-NEXT: vfmv.f.s fa4, v11
+; RV64-NEXT: vfmv.f.s fa4, v12
; RV64-NEXT: fcvt.l.s a2, fa5
; RV64-NEXT: sd a2, 232(sp)
; RV64-NEXT: flw fa5, 112(sp)
; RV64-NEXT: fcvt.l.s a2, fa4
-; RV64-NEXT: vfmv.f.s fa4, v12
-; RV64-NEXT: vslidedown.vi v10, v8, 6
+; RV64-NEXT: vfmv.f.s fa4, v13
+; RV64-NEXT: vslidedown.vi v12, v8, 6
; RV64-NEXT: fcvt.l.s a3, fa5
; RV64-NEXT: sd a3, 224(sp)
; RV64-NEXT: flw fa5, 108(sp)
; RV64-NEXT: fcvt.l.s a3, fa4
-; RV64-NEXT: vfmv.f.s fa4, v14
-; RV64-NEXT: vslidedown.vi v12, v8, 5
+; RV64-NEXT: vfmv.f.s fa4, v10
+; RV64-NEXT: vslidedown.vi v10, v8, 5
; RV64-NEXT: fcvt.l.s a4, fa5
; RV64-NEXT: sd a4, 216(sp)
; RV64-NEXT: flw fa5, 104(sp)
; RV64-NEXT: fcvt.l.s a4, fa4
-; RV64-NEXT: vfmv.f.s fa4, v10
+; RV64-NEXT: vfmv.f.s fa4, v12
; RV64-NEXT: fcvt.l.s a5, fa4
; RV64-NEXT: fcvt.l.s a6, fa5
; RV64-NEXT: sd a6, 208(sp)
; RV64-NEXT: flw fa5, 100(sp)
-; RV64-NEXT: vfmv.f.s fa4, v12
+; RV64-NEXT: vfmv.f.s fa4, v10
; RV64-NEXT: fcvt.l.s a6, fa4
; RV64-NEXT: vslidedown.vi v8, v8, 4
; RV64-NEXT: fcvt.l.s a7, fa5
@@ -866,15 +866,15 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
; RV64-LABEL: llrint_v4i64_v4f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 1
+; RV64-NEXT: vslidedown.vi v12, v8, 1
; RV64-NEXT: vfmv.f.s fa5, v8
; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v12, v8, 2
+; RV64-NEXT: vslidedown.vi v10, v8, 2
; RV64-NEXT: vslidedown.vi v8, v8, 3
; RV64-NEXT: fcvt.l.d a0, fa5
-; RV64-NEXT: vfmv.f.s fa5, v10
-; RV64-NEXT: fcvt.l.d a1, fa5
; RV64-NEXT: vfmv.f.s fa5, v12
+; RV64-NEXT: fcvt.l.d a1, fa5
+; RV64-NEXT: vfmv.f.s fa5, v10
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-NEXT: vmv.v.x v10, a0
; RV64-NEXT: fcvt.l.d a0, fa5
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint-vp.ll
index 1c920e42f7d41..5b5163c17a5c9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint-vp.ll
@@ -70,8 +70,8 @@ define <3 x iXLen> @lrint_v3f32(<3 x float> %x, <3 x i1> %m, i32 zeroext %evl) {
; RV64-i64-LABEL: lrint_v3f32:
; RV64-i64: # %bb.0:
; RV64-i64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-i64-NEXT: vfwcvt.x.f.v v10, v8, v0.t
-; RV64-i64-NEXT: vmv2r.v v8, v10
+; RV64-i64-NEXT: vmv1r.v v10, v8
+; RV64-i64-NEXT: vfwcvt.x.f.v v8, v10, v0.t
; RV64-i64-NEXT: ret
%a = call <3 x iXLen> @llvm.vp.lrint.v3iXLen.v3f32(<3 x float> %x, <3 x i1> %m, i32 %evl)
ret <3 x iXLen> %a
@@ -94,8 +94,8 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x, <4 x i1> %m, i32 zeroext %evl) {
; RV64-i64-LABEL: lrint_v4f32:
; RV64-i64: # %bb.0:
; RV64-i64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-i64-NEXT: vfwcvt.x.f.v v10, v8, v0.t
-; RV64-i64-NEXT: vmv2r.v v8, v10
+; RV64-i64-NEXT: vmv1r.v v10, v8
+; RV64-i64-NEXT: vfwcvt.x.f.v v8, v10, v0.t
; RV64-i64-NEXT: ret
%a = call <4 x iXLen> @llvm.vp.lrint.v4iXLen.v4f32(<4 x float> %x, <4 x i1> %m, i32 %evl)
ret <4 x iXLen> %a
@@ -118,8 +118,8 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x, <8 x i1> %m, i32 zeroext %evl) {
; RV64-i64-LABEL: lrint_v8f32:
; RV64-i64: # %bb.0:
; RV64-i64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-i64-NEXT: vfwcvt.x.f.v v12, v8, v0.t
-; RV64-i64-NEXT: vmv4r.v v8, v12
+; RV64-i64-NEXT: vmv2r.v v12, v8
+; RV64-i64-NEXT: vfwcvt.x.f.v v8, v12, v0.t
; RV64-i64-NEXT: ret
%a = call <8 x iXLen> @llvm.vp.lrint.v8iXLen.v8f32(<8 x float> %x, <8 x i1> %m, i32 %evl)
ret <8 x iXLen> %a
@@ -142,8 +142,8 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x, <16 x i1> %m, i32 zeroext %ev
; RV64-i64-LABEL: lrint_v16f32:
; RV64-i64: # %bb.0:
; RV64-i64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV64-i64-NEXT: vfwcvt.x.f.v v16, v8, v0.t
-; RV64-i64-NEXT: vmv8r.v v8, v16
+; RV64-i64-NEXT: vmv4r.v v16, v8
+; RV64-i64-NEXT: vfwcvt.x.f.v v8, v16, v0.t
; RV64-i64-NEXT: ret
%a = call <16 x iXLen> @llvm.vp.lrint.v16iXLen.v16f32(<16 x float> %x, <16 x i1> %m, i32 %evl)
ret <16 x iXLen> %a
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll
index 2f58e3dd2769f..27abd624f7765 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll
@@ -125,18 +125,18 @@ define <3 x iXLen> @lrint_v3f32(<3 x float> %x) {
; RV64-i64-NEXT: vslidedown.vi v9, v8, 1
; RV64-i64-NEXT: vfmv.f.s fa5, v8
; RV64-i64-NEXT: vslidedown.vi v10, v8, 2
-; RV64-i64-NEXT: vslidedown.vi v8, v8, 3
+; RV64-i64-NEXT: vslidedown.vi v11, v8, 3
; RV64-i64-NEXT: fcvt.l.s a0, fa5
; RV64-i64-NEXT: vfmv.f.s fa5, v9
; RV64-i64-NEXT: fcvt.l.s a1, fa5
; RV64-i64-NEXT: vfmv.f.s fa5, v10
; RV64-i64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-i64-NEXT: vmv.v.x v10, a0
+; RV64-i64-NEXT: vmv.v.x v8, a0
; RV64-i64-NEXT: fcvt.l.s a0, fa5
; RV64-i64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64-i64-NEXT: vfmv.f.s fa5, v8
+; RV64-i64-NEXT: vfmv.f.s fa5, v11
; RV64-i64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-i64-NEXT: vslide1down.vx v8, v10, a1
+; RV64-i64-NEXT: vslide1down.vx v8, v8, a1
; RV64-i64-NEXT: vslide1down.vx v8, v8, a0
; RV64-i64-NEXT: fcvt.l.s a0, fa5
; RV64-i64-NEXT: vslide1down.vx v8, v8, a0
@@ -193,18 +193,18 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
; RV64-i64-NEXT: vslidedown.vi v9, v8, 1
; RV64-i64-NEXT: vfmv.f.s fa5, v8
; RV64-i64-NEXT: vslidedown.vi v10, v8, 2
-; RV64-i64-NEXT: vslidedown.vi v8, v8, 3
+; RV64-i64-NEXT: vslidedown.vi v11, v8, 3
; RV64-i64-NEXT: fcvt.l.s a0, fa5
; RV64-i64-NEXT: vfmv.f.s fa5, v9
; RV64-i64-NEXT: fcvt.l.s a1, fa5
; RV64-i64-NEXT: vfmv.f.s fa5, v10
; RV64-i64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-i64-NEXT: vmv.v.x v10, a0
+; RV64-i64-NEXT: vmv.v.x v8, a0
; RV64-i64-NEXT: fcvt.l.s a0, fa5
; RV64-i64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64-i64-NEXT: vfmv.f.s fa5, v8
+; RV64-i64-NEXT: vfmv.f.s fa5, v11
; RV64-i64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-i64-NEXT: vslide1down.vx v8, v10, a1
+; RV64-i64-NEXT: vslide1down.vx v8, v8, a1
; RV64-i64-NEXT: vslide1down.vx v8, v8, a0
; RV64-i64-NEXT: fcvt.l.s a0, fa5
; RV64-i64-NEXT: vslide1down.vx v8, v8, a0
@@ -537,42 +537,42 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x) {
; RV64-i64-NEXT: vfmv.f.s fa4, v8
; RV64-i64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-i64-NEXT: vslidedown.vi v10, v8, 3
-; RV64-i64-NEXT: vslidedown.vi v11, v8, 2
+; RV64-i64-NEXT: vslidedown.vi v12, v8, 2
; RV64-i64-NEXT: fcvt.l.s a0, fa5
; RV64-i64-NEXT: sd a0, 248(sp)
; RV64-i64-NEXT: flw fa5, 120(sp)
-; RV64-i64-NEXT: vslidedown.vi v12, v8, 1
+; RV64-i64-NEXT: vslidedown.vi v13, v8, 1
; RV64-i64-NEXT: fcvt.l.s a0, fa4
; RV64-i64-NEXT: vfmv.f.s fa4, v10
; RV64-i64-NEXT: fcvt.l.s a1, fa5
; RV64-i64-NEXT: sd a1, 240(sp)
; RV64-i64-NEXT: flw fa5, 116(sp)
; RV64-i64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV64-i64-NEXT: vslidedown.vi v14, v8, 7
+; RV64-i64-NEXT: vslidedown.vi v10, v8, 7
; RV64-i64-NEXT: fcvt.l.s a1, fa4
-; RV64-i64-NEXT: vfmv.f.s fa4, v11
+; RV64-i64-NEXT: vfmv.f.s fa4, v12
; RV64-i64-NEXT: fcvt.l.s a2, fa5
; RV64-i64-NEXT: sd a2, 232(sp)
; RV64-i64-NEXT: flw fa5, 112(sp)
; RV64-i64-NEXT: fcvt.l.s a2, fa4
-; RV64-i64-NEXT: vfmv.f.s fa4, v12
-; RV64-i64-NEXT: vslidedown.vi v10, v8, 6
+; RV64-i64-NEXT: vfmv.f.s fa4, v13
+; RV64-i64-NEXT: vslidedown.vi v12, v8, 6
; RV64-i64-NEXT: fcvt.l.s a3, fa5
; RV64-i64-NEXT: sd a3, 224(sp)
; RV64-i64-NEXT: flw fa5, 108(sp)
; RV64-i64-NEXT: fcvt.l.s a3, fa4
-; RV64-i64-NEXT: vfmv.f.s fa4, v14
-; RV64-i64-NEXT: vslidedown.vi v12, v8, 5
+; RV64-i64-NEXT: vfmv.f.s fa4, v10
+; RV64-i64-NEXT: vslidedown.vi v10, v8, 5
; RV64-i64-NEXT: fcvt.l.s a4, fa5
; RV64-i64-NEXT: sd a4, 216(sp)
; RV64-i64-NEXT: flw fa5, 104(sp)
; RV64-i64-NEXT: fcvt.l.s a4, fa4
-; RV64-i64-NEXT: vfmv.f.s fa4, v10
+; RV64-i64-NEXT: vfmv.f.s fa4, v12
; RV64-i64-NEXT: fcvt.l.s a5, fa4
; RV64-i64-NEXT: fcvt.l.s a6, fa5
; RV64-i64-NEXT: sd a6, 208(sp)
; RV64-i64-NEXT: flw fa5, 100(sp)
-; RV64-i64-NEXT: vfmv.f.s fa4, v12
+; RV64-i64-NEXT: vfmv.f.s fa4, v10
; RV64-i64-NEXT: fcvt.l.s a6, fa4
; RV64-i64-NEXT: vslidedown.vi v8, v8, 4
; RV64-i64-NEXT: fcvt.l.s a7, fa5
@@ -683,15 +683,15 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
; RV32-LABEL: lrint_v4f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 1
+; RV32-NEXT: vslidedown.vi v12, v8, 1
; RV32-NEXT: vfmv.f.s fa5, v8
; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v8, 2
+; RV32-NEXT: vslidedown.vi v10, v8, 2
; RV32-NEXT: vslidedown.vi v8, v8, 3
; RV32-NEXT: fcvt.w.d a0, fa5
-; RV32-NEXT: vfmv.f.s fa5, v10
-; RV32-NEXT: fcvt.w.d a1, fa5
; RV32-NEXT: vfmv.f.s fa5, v12
+; RV32-NEXT: fcvt.w.d a1, fa5
+; RV32-NEXT: vfmv.f.s fa5, v10
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vmv.v.x v9, a0
; RV32-NEXT: fcvt.w.d a0, fa5
@@ -707,15 +707,15 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
; RV64-i32-LABEL: lrint_v4f64:
; RV64-i32: # %bb.0:
; RV64-i32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-i32-NEXT: vslidedown.vi v10, v8, 1
+; RV64-i32-NEXT: vslidedown.vi v12, v8, 1
; RV64-i32-NEXT: vfmv.f.s fa5, v8
; RV64-i32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-i32-NEXT: vslidedown.vi v12, v8, 2
+; RV64-i32-NEXT: vslidedown.vi v10, v8, 2
; RV64-i32-NEXT: vslidedown.vi v8, v8, 3
; RV64-i32-NEXT: fcvt.l.d a0, fa5
-; RV64-i32-NEXT: vfmv.f.s fa5, v10
-; RV64-i32-NEXT: fcvt.l.d a1, fa5
; RV64-i32-NEXT: vfmv.f.s fa5, v12
+; RV64-i32-NEXT: fcvt.l.d a1, fa5
+; RV64-i32-NEXT: vfmv.f.s fa5, v10
; RV64-i32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-i32-NEXT: vmv.v.x v9, a0
; RV64-i32-NEXT: fcvt.l.d a0, fa5
@@ -731,15 +731,15 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
; RV64-i64-LABEL: lrint_v4f64:
; RV64-i64: # %bb.0:
; RV64-i64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-i64-NEXT: vslidedown.vi v10, v8, 1
+; RV64-i64-NEXT: vslidedown.vi v12, v8, 1
; RV64-i64-NEXT: vfmv.f.s fa5, v8
; RV64-i64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-i64-NEXT: vslidedown.vi v12, v8, 2
+; RV64-i64-NEXT: vslidedown.vi v10, v8, 2
; RV64-i64-NEXT: vslidedown.vi v8, v8, 3
; RV64-i64-NEXT: fcvt.l.d a0, fa5
-; RV64-i64-NEXT: vfmv.f.s fa5, v10
-; RV64-i64-NEXT: fcvt.l.d a1, fa5
; RV64-i64-NEXT: vfmv.f.s fa5, v12
+; RV64-i64-NEXT: fcvt.l.d a1, fa5
+; RV64-i64-NEXT: vfmv.f.s fa5, v10
; RV64-i64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-i64-NEXT: vmv.v.x v10, a0
; RV64-i64-NEXT: fcvt.l.d a0, fa5
@@ -768,17 +768,17 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
; RV32-NEXT: andi sp, sp, -64
; RV32-NEXT: mv a0, sp
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v8, 1
+; RV32-NEXT: vslidedown.vi v14, v8, 1
; RV32-NEXT: vfmv.f.s fa5, v8
; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v14, v8, 2
+; RV32-NEXT: vslidedown.vi v12, v8, 2
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vse64.v v8, (a0)
; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV32-NEXT: vslidedown.vi v8, v8, 3
-; RV32-NEXT: vfmv.f.s fa4, v12
+; RV32-NEXT: vfmv.f.s fa4, v14
; RV32-NEXT: fcvt.w.d a0, fa5
-; RV32-NEXT: vfmv.f.s fa5, v14
+; RV32-NEXT: vfmv.f.s fa5, v12
; RV32-NEXT: vfmv.f.s fa3, v8
; RV32-NEXT: fcvt.w.d a1, fa4
; RV32-NEXT: fcvt.w.d a2, fa5
@@ -823,17 +823,17 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
; RV64-i32-NEXT: andi sp, sp, -64
; RV64-i32-NEXT: mv a0, sp
; RV64-i32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-i32-NEXT: vslidedown.vi v12, v8, 1
+; RV64-i32-NEXT: vslidedown.vi v14, v8, 1
; RV64-i32-NEXT: vfmv.f.s fa5, v8
; RV64-i32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-i32-NEXT: vslidedown.vi v14, v8, 2
+; RV64-i32-NEXT: vslidedown.vi v12, v8, 2
; RV64-i32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-i32-NEXT: vse64.v v8, (a0)
; RV64-i32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV64-i32-NEXT: vslidedown.vi v8, v8, 3
-; RV64-i32-NEXT: vfmv.f.s fa4, v12
+; RV64-i32-NEXT: vfmv.f.s fa4, v14
; RV64-i32-NEXT: fcvt.l.d a0, fa5
-; RV64-i32-NEXT: vfmv.f.s fa5, v14
+; RV64-i32-NEXT: vfmv.f.s fa5, v12
; RV64-i32-NEXT: vfmv.f.s fa3, v8
; RV64-i32-NEXT: fcvt.l.d a1, fa4
; RV64-i32-NEXT: fcvt.l.d a2, fa5
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 837a8891dfeb9..76590d47a3230 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -3733,8 +3733,8 @@ define <4 x i64> @mgather_truemask_v4i64(<4 x ptr> %ptrs, <4 x i64> %passthru) {
; RV32V-LABEL: mgather_truemask_v4i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32V-NEXT: vluxei32.v v10, (zero), v8
-; RV32V-NEXT: vmv.v.v v8, v10
+; RV32V-NEXT: vmv1r.v v10, v8
+; RV32V-NEXT: vluxei32.v v8, (zero), v10
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_truemask_v4i64:
@@ -11010,8 +11010,8 @@ define <4 x double> @mgather_truemask_v4f64(<4 x ptr> %ptrs, <4 x double> %passt
; RV32V-LABEL: mgather_truemask_v4f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32V-NEXT: vluxei32.v v10, (zero), v8
-; RV32V-NEXT: vmv.v.v v8, v10
+; RV32V-NEXT: vmv1r.v v10, v8
+; RV32V-NEXT: vluxei32.v v8, (zero), v10
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_truemask_v4f64:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
index 7354f9afa9a71..8f2672e8f40c1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
@@ -12441,14 +12441,14 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; RV64V-NEXT: vsoxei64.v v8, (a0), v16, v0.t
; RV64V-NEXT: vsetivli zero, 16, e8, m2, ta, ma
-; RV64V-NEXT: vslidedown.vi v8, v8, 16
-; RV64V-NEXT: vslidedown.vi v10, v10, 16
+; RV64V-NEXT: vslidedown.vi v16, v8, 16
+; RV64V-NEXT: vslidedown.vi v18, v10, 16
; RV64V-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64V-NEXT: vslidedown.vi v0, v0, 2
; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64V-NEXT: vsext.vf8 v16, v10
+; RV64V-NEXT: vsext.vf8 v8, v18
; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; RV64V-NEXT: vsoxei64.v v8, (a0), v16, v0.t
+; RV64V-NEXT: vsoxei64.v v16, (a0), v8, v0.t
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_baseidx_v32i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
index c6a8f2b50076e..2cebf14ec5aa4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
@@ -786,12 +786,12 @@ define float @vreduce_ord_fwadd_v64f32(ptr %x, float %s) {
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v16, fa0
+; CHECK-NEXT: vfmv.s.f v24, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; CHECK-NEXT: vslidedown.vx v24, v8, a0
+; CHECK-NEXT: vslidedown.vx v16, v8, a0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vfwredosum.vs v8, v8, v16
-; CHECK-NEXT: vfwredosum.vs v8, v24, v8
+; CHECK-NEXT: vfwredosum.vs v8, v8, v24
+; CHECK-NEXT: vfwredosum.vs v8, v16, v8
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
@@ -1175,12 +1175,12 @@ define double @vreduce_ord_fwadd_v32f64(ptr %x, double %s) {
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v16, fa0
+; CHECK-NEXT: vfmv.s.f v24, fa0
; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
-; CHECK-NEXT: vslidedown.vi v24, v8, 16
+; CHECK-NEXT: vslidedown.vi v16, v8, 16
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT: vfwredosum.vs v8, v8, v16
-; CHECK-NEXT: vfwredosum.vs v8, v24, v8
+; CHECK-NEXT: vfwredosum.vs v8, v8, v24
+; CHECK-NEXT: vfwredosum.vs v8, v16, v8
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll
index 707d1202aca0f..3c0a4fd89660d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll
@@ -1522,15 +1522,15 @@ define i64 @vwreduce_add_v64i64(ptr %x) {
; RV32-NEXT: vle32.v v16, (a0)
; RV32-NEXT: vle32.v v8, (a1)
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
-; RV32-NEXT: vslidedown.vi v0, v16, 16
+; RV32-NEXT: vslidedown.vi v24, v16, 16
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT: vwadd.vv v24, v16, v8
+; RV32-NEXT: vwadd.vv v0, v16, v8
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; RV32-NEXT: vslidedown.vi v8, v8, 16
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT: vwadd.vv v16, v0, v8
+; RV32-NEXT: vwadd.vv v16, v24, v8
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT: vadd.vv v8, v24, v16
+; RV32-NEXT: vadd.vv v8, v0, v16
; RV32-NEXT: vmv.s.x v16, zero
; RV32-NEXT: vredsum.vs v8, v8, v16
; RV32-NEXT: vmv.x.s a0, v8
@@ -1588,15 +1588,15 @@ define i64 @vwreduce_uadd_v64i64(ptr %x) {
; RV32-NEXT: vle32.v v16, (a0)
; RV32-NEXT: vle32.v v8, (a1)
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
-; RV32-NEXT: vslidedown.vi v0, v16, 16
+; RV32-NEXT: vslidedown.vi v24, v16, 16
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT: vwaddu.vv v24, v16, v8
+; RV32-NEXT: vwaddu.vv v0, v16, v8
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; RV32-NEXT: vslidedown.vi v8, v8, 16
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT: vwaddu.vv v16, v0, v8
+; RV32-NEXT: vwaddu.vv v16, v24, v8
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT: vadd.vv v8, v24, v16
+; RV32-NEXT: vadd.vv v8, v0, v16
; RV32-NEXT: vmv.s.x v16, zero
; RV32-NEXT: vredsum.vs v8, v8, v16
; RV32-NEXT: vmv.x.s a0, v8
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sad.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sad.ll
index 8da605d35270d..c0a213034c95b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sad.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sad.ll
@@ -76,11 +76,11 @@ define signext i32 @sad_16x8_as_i32(<16 x i8> %a, <16 x i8> %b) {
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vminu.vv v10, v8, v9
; CHECK-NEXT: vmaxu.vv v8, v8, v9
-; CHECK-NEXT: vsub.vv v8, v8, v10
+; CHECK-NEXT: vsub.vv v12, v8, v10
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vzext.vf4 v12, v8
-; CHECK-NEXT: vmv.s.x v8, zero
-; CHECK-NEXT: vredsum.vs v8, v12, v8
+; CHECK-NEXT: vzext.vf4 v8, v12
+; CHECK-NEXT: vmv.s.x v12, zero
+; CHECK-NEXT: vredsum.vs v8, v8, v12
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
entry:
@@ -117,22 +117,22 @@ define signext i32 @sad_2block_16xi8_as_i32(ptr %a, ptr %b, i32 signext %stridea
; CHECK-NEXT: vle8.v v11, (a1)
; CHECK-NEXT: vsub.vv v10, v10, v14
; CHECK-NEXT: vminu.vv v14, v12, v13
-; CHECK-NEXT: vmaxu.vv v12, v12, v13
-; CHECK-NEXT: vwaddu.vv v16, v10, v8
-; CHECK-NEXT: vsub.vv v8, v12, v14
+; CHECK-NEXT: vmaxu.vv v15, v12, v13
+; CHECK-NEXT: vwaddu.vv v12, v10, v8
+; CHECK-NEXT: vsub.vv v8, v15, v14
; CHECK-NEXT: vminu.vv v10, v9, v11
; CHECK-NEXT: vmaxu.vv v9, v9, v11
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v12, v8
+; CHECK-NEXT: vzext.vf2 v14, v8
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT: vsub.vv v8, v9, v10
+; CHECK-NEXT: vsub.vv v16, v9, v10
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vwaddu.vv v20, v12, v16
-; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vwaddu.wv v20, v20, v10
+; CHECK-NEXT: vwaddu.vv v8, v14, v12
+; CHECK-NEXT: vzext.vf2 v12, v16
+; CHECK-NEXT: vwaddu.wv v8, v8, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vmv.s.x v8, zero
-; CHECK-NEXT: vredsum.vs v8, v20, v8
+; CHECK-NEXT: vmv.s.x v12, zero
+; CHECK-NEXT: vredsum.vs v8, v8, v12
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-scalarized.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-scalarized.ll
index 6b81b781a898f..c0a352e160193 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-scalarized.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-scalarized.ll
@@ -24,21 +24,21 @@ define <8 x float> @fpext_v8bf16(<8 x bfloat> %x) {
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v8, a7
; CHECK-NEXT: vmv.s.x v9, a6
-; CHECK-NEXT: vmv.s.x v10, a5
-; CHECK-NEXT: vmv.s.x v12, a4
-; CHECK-NEXT: vmv.s.x v11, a3
+; CHECK-NEXT: vmv.s.x v11, a5
+; CHECK-NEXT: vmv.s.x v10, a4
+; CHECK-NEXT: vmv.s.x v12, a3
; CHECK-NEXT: vmv.s.x v13, a2
; CHECK-NEXT: vslideup.vi v9, v8, 1
; CHECK-NEXT: vmv.s.x v14, a1
-; CHECK-NEXT: vslideup.vi v12, v10, 1
-; CHECK-NEXT: vslideup.vi v13, v11, 1
+; CHECK-NEXT: vslideup.vi v10, v11, 1
+; CHECK-NEXT: vslideup.vi v13, v12, 1
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vslideup.vi v8, v14, 1
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vslideup.vi v12, v9, 2
+; CHECK-NEXT: vslideup.vi v10, v9, 2
; CHECK-NEXT: vslideup.vi v8, v13, 2
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v12, 4
+; CHECK-NEXT: vslideup.vi v8, v10, 4
; CHECK-NEXT: ret
%y = fpext <8 x bfloat> %x to <8 x float>
ret <8 x float> %y
@@ -66,21 +66,21 @@ define <8 x float> @fpext_v8f16(<8 x bfloat> %x) {
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v8, a7
; CHECK-NEXT: vmv.s.x v9, a6
-; CHECK-NEXT: vmv.s.x v10, a5
-; CHECK-NEXT: vmv.s.x v12, a4
-; CHECK-NEXT: vmv.s.x v11, a3
+; CHECK-NEXT: vmv.s.x v11, a5
+; CHECK-NEXT: vmv.s.x v10, a4
+; CHECK-NEXT: vmv.s.x v12, a3
; CHECK-NEXT: vmv.s.x v13, a2
; CHECK-NEXT: vslideup.vi v9, v8, 1
; CHECK-NEXT: vmv.s.x v14, a1
-; CHECK-NEXT: vslideup.vi v12, v10, 1
-; CHECK-NEXT: vslideup.vi v13, v11, 1
+; CHECK-NEXT: vslideup.vi v10, v11, 1
+; CHECK-NEXT: vslideup.vi v13, v12, 1
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vslideup.vi v8, v14, 1
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vslideup.vi v12, v9, 2
+; CHECK-NEXT: vslideup.vi v10, v9, 2
; CHECK-NEXT: vslideup.vi v8, v13, 2
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v12, 4
+; CHECK-NEXT: vslideup.vi v8, v10, 4
; CHECK-NEXT: ret
%y = fpext <8 x bfloat> %x to <8 x float>
ret <8 x float> %y
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll
index 90d4a5e511c87..8fef00dd597e3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll
@@ -412,12 +412,12 @@ define <8 x i1> @fcmp_one_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vmv.v.x v8, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vmflt.vv v8, v10, v12, v0.t
-; ZVFHMIN-NEXT: vmflt.vv v9, v12, v10, v0.t
-; ZVFHMIN-NEXT: vmor.mm v0, v9, v8
+; ZVFHMIN-NEXT: vmflt.vv v12, v10, v8, v0.t
+; ZVFHMIN-NEXT: vmflt.vv v13, v8, v10, v0.t
+; ZVFHMIN-NEXT: vmor.mm v0, v13, v12
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <8 x half> poison, half %b, i32 0
%vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -439,12 +439,12 @@ define <8 x i1> @fcmp_one_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vmv.v.x v8, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vmflt.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT: vmflt.vv v9, v10, v12, v0.t
-; ZVFHMIN-NEXT: vmor.mm v0, v9, v8
+; ZVFHMIN-NEXT: vmflt.vv v12, v8, v10, v0.t
+; ZVFHMIN-NEXT: vmflt.vv v13, v10, v8, v0.t
+; ZVFHMIN-NEXT: vmor.mm v0, v13, v12
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <8 x half> poison, half %b, i32 0
%vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -493,14 +493,14 @@ define <8 x i1> @fcmp_ord_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vmv.v.x v8, a1
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v9, v10, v10, v0.t
+; ZVFHMIN-NEXT: vmfeq.vv v13, v10, v10, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10, v0.t
-; ZVFHMIN-NEXT: vmand.mm v0, v9, v8
+; ZVFHMIN-NEXT: vmfeq.vv v10, v8, v8, v0.t
+; ZVFHMIN-NEXT: vmand.mm v0, v13, v10
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <8 x half> poison, half %b, i32 0
%vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -524,14 +524,14 @@ define <8 x i1> @fcmp_ord_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vmv.v.x v8, a1
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v9, v10, v10, v0.t
+; ZVFHMIN-NEXT: vmfeq.vv v13, v10, v10, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10, v0.t
-; ZVFHMIN-NEXT: vmand.mm v0, v8, v9
+; ZVFHMIN-NEXT: vmfeq.vv v10, v8, v8, v0.t
+; ZVFHMIN-NEXT: vmand.mm v0, v10, v13
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <8 x half> poison, half %b, i32 0
%vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -576,12 +576,12 @@ define <8 x i1> @fcmp_ueq_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vmv.v.x v8, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vmflt.vv v8, v10, v12, v0.t
-; ZVFHMIN-NEXT: vmflt.vv v9, v12, v10, v0.t
-; ZVFHMIN-NEXT: vmnor.mm v0, v9, v8
+; ZVFHMIN-NEXT: vmflt.vv v12, v10, v8, v0.t
+; ZVFHMIN-NEXT: vmflt.vv v13, v8, v10, v0.t
+; ZVFHMIN-NEXT: vmnor.mm v0, v13, v12
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <8 x half> poison, half %b, i32 0
%vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -603,12 +603,12 @@ define <8 x i1> @fcmp_ueq_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vmv.v.x v8, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vmflt.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT: vmflt.vv v9, v10, v12, v0.t
-; ZVFHMIN-NEXT: vmnor.mm v0, v9, v8
+; ZVFHMIN-NEXT: vmflt.vv v12, v8, v10, v0.t
+; ZVFHMIN-NEXT: vmflt.vv v13, v10, v8, v0.t
+; ZVFHMIN-NEXT: vmnor.mm v0, v13, v12
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <8 x half> poison, half %b, i32 0
%vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -649,12 +649,12 @@ define <8 x i1> @fcmp_ugt_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vmfle.vv v8, v10, v12, v0.t
-; ZVFHMIN-NEXT: vmnot.m v0, v8
+; ZVFHMIN-NEXT: vmfle.vv v12, v10, v8, v0.t
+; ZVFHMIN-NEXT: vmnot.m v0, v12
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <8 x half> poison, half %b, i32 0
%vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -674,12 +674,12 @@ define <8 x i1> @fcmp_ugt_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vmfle.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT: vmnot.m v0, v8
+; ZVFHMIN-NEXT: vmfle.vv v12, v8, v10, v0.t
+; ZVFHMIN-NEXT: vmnot.m v0, v12
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <8 x half> poison, half %b, i32 0
%vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -720,12 +720,12 @@ define <8 x i1> @fcmp_uge_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vmflt.vv v8, v10, v12, v0.t
-; ZVFHMIN-NEXT: vmnot.m v0, v8
+; ZVFHMIN-NEXT: vmflt.vv v12, v10, v8, v0.t
+; ZVFHMIN-NEXT: vmnot.m v0, v12
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <8 x half> poison, half %b, i32 0
%vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -745,12 +745,12 @@ define <8 x i1> @fcmp_uge_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vmflt.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT: vmnot.m v0, v8
+; ZVFHMIN-NEXT: vmflt.vv v12, v8, v10, v0.t
+; ZVFHMIN-NEXT: vmnot.m v0, v12
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <8 x half> poison, half %b, i32 0
%vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -791,12 +791,12 @@ define <8 x i1> @fcmp_ult_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vmfle.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT: vmnot.m v0, v8
+; ZVFHMIN-NEXT: vmfle.vv v12, v8, v10, v0.t
+; ZVFHMIN-NEXT: vmnot.m v0, v12
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <8 x half> poison, half %b, i32 0
%vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -816,12 +816,12 @@ define <8 x i1> @fcmp_ult_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vmfle.vv v8, v10, v12, v0.t
-; ZVFHMIN-NEXT: vmnot.m v0, v8
+; ZVFHMIN-NEXT: vmfle.vv v12, v10, v8, v0.t
+; ZVFHMIN-NEXT: vmnot.m v0, v12
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <8 x half> poison, half %b, i32 0
%vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -862,12 +862,12 @@ define <8 x i1> @fcmp_ule_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vmflt.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT: vmnot.m v0, v8
+; ZVFHMIN-NEXT: vmflt.vv v12, v8, v10, v0.t
+; ZVFHMIN-NEXT: vmnot.m v0, v12
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <8 x half> poison, half %b, i32 0
%vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -887,12 +887,12 @@ define <8 x i1> @fcmp_ule_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vmflt.vv v8, v10, v12, v0.t
-; ZVFHMIN-NEXT: vmnot.m v0, v8
+; ZVFHMIN-NEXT: vmflt.vv v12, v10, v8, v0.t
+; ZVFHMIN-NEXT: vmnot.m v0, v12
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <8 x half> poison, half %b, i32 0
%vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -1009,14 +1009,14 @@ define <8 x i1> @fcmp_uno_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vmv.v.x v8, a1
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vmfne.vv v9, v10, v10, v0.t
+; ZVFHMIN-NEXT: vmfne.vv v13, v10, v10, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10, v0.t
-; ZVFHMIN-NEXT: vmor.mm v0, v9, v8
+; ZVFHMIN-NEXT: vmfne.vv v10, v8, v8, v0.t
+; ZVFHMIN-NEXT: vmor.mm v0, v13, v10
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <8 x half> poison, half %b, i32 0
%vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -1040,14 +1040,14 @@ define <8 x i1> @fcmp_uno_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vmv.v.x v8, a1
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vmfne.vv v9, v10, v10, v0.t
+; ZVFHMIN-NEXT: vmfne.vv v13, v10, v10, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10, v0.t
-; ZVFHMIN-NEXT: vmor.mm v0, v8, v9
+; ZVFHMIN-NEXT: vmfne.vv v10, v8, v8, v0.t
+; ZVFHMIN-NEXT: vmor.mm v0, v10, v13
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <8 x half> poison, half %b, i32 0
%vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -1133,7 +1133,7 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN32-NEXT: addi s0, sp, 896
; ZVFHMIN32-NEXT: .cfi_def_cfa s0, 0
; ZVFHMIN32-NEXT: csrr a1, vlenb
-; ZVFHMIN32-NEXT: li a2, 28
+; ZVFHMIN32-NEXT: li a2, 29
; ZVFHMIN32-NEXT: mul a1, a1, a2
; ZVFHMIN32-NEXT: sub sp, sp, a1
; ZVFHMIN32-NEXT: andi sp, sp, -128
@@ -1305,49 +1305,49 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; ZVFHMIN32-NEXT: vslidedown.vi v10, v8, 7
; ZVFHMIN32-NEXT: csrr a2, vlenb
-; ZVFHMIN32-NEXT: li a3, 27
+; ZVFHMIN32-NEXT: li a3, 28
; ZVFHMIN32-NEXT: mul a2, a2, a3
; ZVFHMIN32-NEXT: add a2, sp, a2
; ZVFHMIN32-NEXT: addi a2, a2, 848
; ZVFHMIN32-NEXT: vs1r.v v10, (a2) # Unknown-size Folded Spill
; ZVFHMIN32-NEXT: vslidedown.vi v10, v8, 6
; ZVFHMIN32-NEXT: csrr a2, vlenb
-; ZVFHMIN32-NEXT: li a3, 26
+; ZVFHMIN32-NEXT: li a3, 27
; ZVFHMIN32-NEXT: mul a2, a2, a3
; ZVFHMIN32-NEXT: add a2, sp, a2
; ZVFHMIN32-NEXT: addi a2, a2, 848
; ZVFHMIN32-NEXT: vs1r.v v10, (a2) # Unknown-size Folded Spill
; ZVFHMIN32-NEXT: vslidedown.vi v10, v8, 5
; ZVFHMIN32-NEXT: csrr a2, vlenb
-; ZVFHMIN32-NEXT: li a3, 25
+; ZVFHMIN32-NEXT: li a3, 26
; ZVFHMIN32-NEXT: mul a2, a2, a3
; ZVFHMIN32-NEXT: add a2, sp, a2
; ZVFHMIN32-NEXT: addi a2, a2, 848
; ZVFHMIN32-NEXT: vs1r.v v10, (a2) # Unknown-size Folded Spill
; ZVFHMIN32-NEXT: vslidedown.vi v10, v8, 4
; ZVFHMIN32-NEXT: csrr a2, vlenb
-; ZVFHMIN32-NEXT: li a3, 24
+; ZVFHMIN32-NEXT: li a3, 25
; ZVFHMIN32-NEXT: mul a2, a2, a3
; ZVFHMIN32-NEXT: add a2, sp, a2
; ZVFHMIN32-NEXT: addi a2, a2, 848
; ZVFHMIN32-NEXT: vs1r.v v10, (a2) # Unknown-size Folded Spill
; ZVFHMIN32-NEXT: vslidedown.vi v10, v8, 3
; ZVFHMIN32-NEXT: csrr a2, vlenb
-; ZVFHMIN32-NEXT: li a3, 23
+; ZVFHMIN32-NEXT: li a3, 24
; ZVFHMIN32-NEXT: mul a2, a2, a3
; ZVFHMIN32-NEXT: add a2, sp, a2
; ZVFHMIN32-NEXT: addi a2, a2, 848
; ZVFHMIN32-NEXT: vs1r.v v10, (a2) # Unknown-size Folded Spill
; ZVFHMIN32-NEXT: vslidedown.vi v10, v8, 2
; ZVFHMIN32-NEXT: csrr a2, vlenb
-; ZVFHMIN32-NEXT: li a3, 22
+; ZVFHMIN32-NEXT: li a3, 23
; ZVFHMIN32-NEXT: mul a2, a2, a3
; ZVFHMIN32-NEXT: add a2, sp, a2
; ZVFHMIN32-NEXT: addi a2, a2, 848
; ZVFHMIN32-NEXT: vs1r.v v10, (a2) # Unknown-size Folded Spill
; ZVFHMIN32-NEXT: vslidedown.vi v10, v8, 1
; ZVFHMIN32-NEXT: csrr a2, vlenb
-; ZVFHMIN32-NEXT: li a3, 21
+; ZVFHMIN32-NEXT: li a3, 22
; ZVFHMIN32-NEXT: mul a2, a2, a3
; ZVFHMIN32-NEXT: add a2, sp, a2
; ZVFHMIN32-NEXT: addi a2, a2, 848
@@ -1359,8 +1359,8 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN32-NEXT: add a2, sp, a2
; ZVFHMIN32-NEXT: addi a2, a2, 848
; ZVFHMIN32-NEXT: vs2r.v v10, (a2) # Unknown-size Folded Spill
-; ZVFHMIN32-NEXT: vslidedown.vi v30, v8, 13
-; ZVFHMIN32-NEXT: vslidedown.vi v6, v8, 12
+; ZVFHMIN32-NEXT: vslidedown.vi v2, v8, 13
+; ZVFHMIN32-NEXT: vslidedown.vi v30, v8, 12
; ZVFHMIN32-NEXT: vslidedown.vi v28, v8, 11
; ZVFHMIN32-NEXT: vslidedown.vi v26, v8, 10
; ZVFHMIN32-NEXT: vslidedown.vi v22, v8, 9
@@ -1373,50 +1373,44 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN32-NEXT: lh a0, 560(sp)
; ZVFHMIN32-NEXT: lh a1, 304(sp)
; ZVFHMIN32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; ZVFHMIN32-NEXT: vslidedown.vi v8, v16, 7
-; ZVFHMIN32-NEXT: csrr a2, vlenb
-; ZVFHMIN32-NEXT: slli a3, a2, 1
-; ZVFHMIN32-NEXT: add a2, a3, a2
-; ZVFHMIN32-NEXT: add a2, sp, a2
-; ZVFHMIN32-NEXT: addi a2, a2, 848
+; ZVFHMIN32-NEXT: vslidedown.vi v21, v16, 7
+; ZVFHMIN32-NEXT: vslidedown.vi v23, v16, 6
+; ZVFHMIN32-NEXT: vslidedown.vi v29, v16, 5
+; ZVFHMIN32-NEXT: vslidedown.vi v31, v16, 4
+; ZVFHMIN32-NEXT: vslidedown.vi v8, v16, 3
+; ZVFHMIN32-NEXT: addi a2, sp, 848
; ZVFHMIN32-NEXT: vs1r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN32-NEXT: vslidedown.vi v21, v16, 6
-; ZVFHMIN32-NEXT: vslidedown.vi v8, v16, 5
+; ZVFHMIN32-NEXT: vslidedown.vi v27, v16, 2
+; ZVFHMIN32-NEXT: vslidedown.vi v8, v16, 1
; ZVFHMIN32-NEXT: csrr a2, vlenb
-; ZVFHMIN32-NEXT: li a3, 18
+; ZVFHMIN32-NEXT: li a3, 19
; ZVFHMIN32-NEXT: mul a2, a2, a3
; ZVFHMIN32-NEXT: add a2, sp, a2
; ZVFHMIN32-NEXT: addi a2, a2, 848
; ZVFHMIN32-NEXT: vs1r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN32-NEXT: vslidedown.vi v23, v16, 4
-; ZVFHMIN32-NEXT: vslidedown.vi v31, v16, 3
-; ZVFHMIN32-NEXT: vslidedown.vi v8, v16, 2
-; ZVFHMIN32-NEXT: addi a2, sp, 848
-; ZVFHMIN32-NEXT: vs1r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN32-NEXT: vslidedown.vi v7, v16, 1
; ZVFHMIN32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; ZVFHMIN32-NEXT: vslidedown.vi v14, v16, 15
; ZVFHMIN32-NEXT: vslidedown.vi v12, v16, 14
; ZVFHMIN32-NEXT: vslidedown.vi v8, v16, 13
; ZVFHMIN32-NEXT: vslidedown.vi v18, v16, 12
; ZVFHMIN32-NEXT: vslidedown.vi v10, v16, 11
-; ZVFHMIN32-NEXT: vslidedown.vi v2, v16, 10
+; ZVFHMIN32-NEXT: vslidedown.vi v6, v16, 10
; ZVFHMIN32-NEXT: csrr a2, vlenb
-; ZVFHMIN32-NEXT: li a3, 19
+; ZVFHMIN32-NEXT: li a3, 20
; ZVFHMIN32-NEXT: mul a2, a2, a3
; ZVFHMIN32-NEXT: add a2, sp, a2
; ZVFHMIN32-NEXT: addi a2, a2, 848
-; ZVFHMIN32-NEXT: vs2r.v v2, (a2) # Unknown-size Folded Spill
-; ZVFHMIN32-NEXT: vslidedown.vi v2, v16, 9
+; ZVFHMIN32-NEXT: vs2r.v v6, (a2) # Unknown-size Folded Spill
+; ZVFHMIN32-NEXT: vslidedown.vi v6, v16, 9
; ZVFHMIN32-NEXT: csrr a2, vlenb
-; ZVFHMIN32-NEXT: li a3, 14
-; ZVFHMIN32-NEXT: mul a2, a2, a3
+; ZVFHMIN32-NEXT: slli a3, a2, 4
+; ZVFHMIN32-NEXT: sub a2, a3, a2
; ZVFHMIN32-NEXT: add a2, sp, a2
; ZVFHMIN32-NEXT: addi a2, a2, 848
-; ZVFHMIN32-NEXT: vs2r.v v2, (a2) # Unknown-size Folded Spill
+; ZVFHMIN32-NEXT: vs2r.v v6, (a2) # Unknown-size Folded Spill
; ZVFHMIN32-NEXT: vslidedown.vi v16, v16, 8
; ZVFHMIN32-NEXT: csrr a2, vlenb
-; ZVFHMIN32-NEXT: li a3, 12
+; ZVFHMIN32-NEXT: li a3, 13
; ZVFHMIN32-NEXT: mul a2, a2, a3
; ZVFHMIN32-NEXT: add a2, sp, a2
; ZVFHMIN32-NEXT: addi a2, a2, 848
@@ -1428,48 +1422,57 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN32-NEXT: lh a0, 558(sp)
; ZVFHMIN32-NEXT: lh a1, 302(sp)
; ZVFHMIN32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; ZVFHMIN32-NEXT: vslidedown.vi v29, v0, 7
+; ZVFHMIN32-NEXT: vslidedown.vi v9, v0, 7
; ZVFHMIN32-NEXT: vslidedown.vi v19, v0, 6
-; ZVFHMIN32-NEXT: vslidedown.vi v27, v0, 5
+; ZVFHMIN32-NEXT: vslidedown.vi v3, v0, 5
; ZVFHMIN32-NEXT: vslidedown.vi v13, v0, 4
; ZVFHMIN32-NEXT: vslidedown.vi v15, v0, 3
-; ZVFHMIN32-NEXT: vslidedown.vi v9, v0, 2
+; ZVFHMIN32-NEXT: vslidedown.vi v16, v0, 2
; ZVFHMIN32-NEXT: vslidedown.vi v11, v0, 1
; ZVFHMIN32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
-; ZVFHMIN32-NEXT: vslidedown.vi v16, v0, 15
-; ZVFHMIN32-NEXT: vslidedown.vi v2, v0, 14
+; ZVFHMIN32-NEXT: vslidedown.vi v6, v0, 15
; ZVFHMIN32-NEXT: csrr a2, vlenb
-; ZVFHMIN32-NEXT: li a3, 6
-; ZVFHMIN32-NEXT: mul a2, a2, a3
+; ZVFHMIN32-NEXT: slli a3, a2, 1
+; ZVFHMIN32-NEXT: add a2, a3, a2
; ZVFHMIN32-NEXT: add a2, sp, a2
; ZVFHMIN32-NEXT: addi a2, a2, 848
-; ZVFHMIN32-NEXT: vs2r.v v2, (a2) # Unknown-size Folded Spill
-; ZVFHMIN32-NEXT: vslidedown.vi v2, v0, 13
+; ZVFHMIN32-NEXT: vs2r.v v6, (a2) # Unknown-size Folded Spill
+; ZVFHMIN32-NEXT: vslidedown.vi v6, v0, 14
; ZVFHMIN32-NEXT: csrr a2, vlenb
-; ZVFHMIN32-NEXT: slli a2, a2, 2
+; ZVFHMIN32-NEXT: slli a3, a2, 3
+; ZVFHMIN32-NEXT: sub a2, a3, a2
; ZVFHMIN32-NEXT: add a2, sp, a2
; ZVFHMIN32-NEXT: addi a2, a2, 848
-; ZVFHMIN32-NEXT: vs2r.v v2, (a2) # Unknown-size Folded Spill
-; ZVFHMIN32-NEXT: vslidedown.vi v2, v0, 12
+; ZVFHMIN32-NEXT: vs2r.v v6, (a2) # Unknown-size Folded Spill
+; ZVFHMIN32-NEXT: vslidedown.vi v6, v0, 13
; ZVFHMIN32-NEXT: csrr a2, vlenb
-; ZVFHMIN32-NEXT: li a3, 10
+; ZVFHMIN32-NEXT: slli a3, a2, 2
+; ZVFHMIN32-NEXT: add a2, a3, a2
+; ZVFHMIN32-NEXT: add a2, sp, a2
+; ZVFHMIN32-NEXT: addi a2, a2, 848
+; ZVFHMIN32-NEXT: vs2r.v v6, (a2) # Unknown-size Folded Spill
+; ZVFHMIN32-NEXT: vslidedown.vi v6, v0, 12
+; ZVFHMIN32-NEXT: csrr a2, vlenb
+; ZVFHMIN32-NEXT: li a3, 11
; ZVFHMIN32-NEXT: mul a2, a2, a3
; ZVFHMIN32-NEXT: add a2, sp, a2
; ZVFHMIN32-NEXT: addi a2, a2, 848
-; ZVFHMIN32-NEXT: vs2r.v v2, (a2) # Unknown-size Folded Spill
-; ZVFHMIN32-NEXT: vslidedown.vi v2, v0, 11
+; ZVFHMIN32-NEXT: vs2r.v v6, (a2) # Unknown-size Folded Spill
+; ZVFHMIN32-NEXT: vslidedown.vi v6, v0, 11
; ZVFHMIN32-NEXT: csrr a2, vlenb
-; ZVFHMIN32-NEXT: slli a2, a2, 3
+; ZVFHMIN32-NEXT: slli a3, a2, 3
+; ZVFHMIN32-NEXT: add a2, a3, a2
; ZVFHMIN32-NEXT: add a2, sp, a2
; ZVFHMIN32-NEXT: addi a2, a2, 848
-; ZVFHMIN32-NEXT: vs2r.v v2, (a2) # Unknown-size Folded Spill
-; ZVFHMIN32-NEXT: vslidedown.vi v2, v0, 10
+; ZVFHMIN32-NEXT: vs2r.v v6, (a2) # Unknown-size Folded Spill
+; ZVFHMIN32-NEXT: vslidedown.vi v6, v0, 10
; ZVFHMIN32-NEXT: csrr a2, vlenb
-; ZVFHMIN32-NEXT: slli a2, a2, 4
+; ZVFHMIN32-NEXT: slli a3, a2, 4
+; ZVFHMIN32-NEXT: add a2, a3, a2
; ZVFHMIN32-NEXT: add a2, sp, a2
; ZVFHMIN32-NEXT: addi a2, a2, 848
-; ZVFHMIN32-NEXT: vs2r.v v2, (a2) # Unknown-size Folded Spill
-; ZVFHMIN32-NEXT: vslidedown.vi v2, v0, 9
+; ZVFHMIN32-NEXT: vs2r.v v6, (a2) # Unknown-size Folded Spill
+; ZVFHMIN32-NEXT: vslidedown.vi v6, v0, 9
; ZVFHMIN32-NEXT: vslidedown.vi v0, v0, 8
; ZVFHMIN32-NEXT: vmv.x.s t3, v4
; ZVFHMIN32-NEXT: fmv.h.x fa5, a0
@@ -1483,14 +1486,14 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN32-NEXT: addi a2, a2, 848
; ZVFHMIN32-NEXT: vl2r.v v4, (a2) # Unknown-size Folded Reload
; ZVFHMIN32-NEXT: vmv.x.s t4, v4
-; ZVFHMIN32-NEXT: vmv.x.s t2, v30
+; ZVFHMIN32-NEXT: vmv.x.s t2, v2
; ZVFHMIN32-NEXT: fmv.h.x fa5, a0
; ZVFHMIN32-NEXT: fmv.h.x fa4, a1
; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4
; ZVFHMIN32-NEXT: sb a0, 214(sp)
; ZVFHMIN32-NEXT: lh a0, 554(sp)
; ZVFHMIN32-NEXT: lh a1, 298(sp)
-; ZVFHMIN32-NEXT: vmv.x.s t1, v6
+; ZVFHMIN32-NEXT: vmv.x.s t1, v30
; ZVFHMIN32-NEXT: vmv.x.s t0, v28
; ZVFHMIN32-NEXT: fmv.h.x fa5, a0
; ZVFHMIN32-NEXT: fmv.h.x fa4, a1
@@ -1553,12 +1556,12 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN32-NEXT: lh t5, 736(sp)
; ZVFHMIN32-NEXT: lh t6, 480(sp)
; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: li a1, 27
+; ZVFHMIN32-NEXT: li a1, 28
; ZVFHMIN32-NEXT: mul a0, a0, a1
; ZVFHMIN32-NEXT: add a0, sp, a0
; ZVFHMIN32-NEXT: lh s5, 848(a0) # 8-byte Folded Reload
; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: li a1, 26
+; ZVFHMIN32-NEXT: li a1, 27
; ZVFHMIN32-NEXT: mul a0, a0, a1
; ZVFHMIN32-NEXT: add a0, sp, a0
; ZVFHMIN32-NEXT: lh s6, 848(a0) # 8-byte Folded Reload
@@ -1569,12 +1572,12 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN32-NEXT: lh t5, 734(sp)
; ZVFHMIN32-NEXT: lh t6, 478(sp)
; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: li a1, 25
+; ZVFHMIN32-NEXT: li a1, 26
; ZVFHMIN32-NEXT: mul a0, a0, a1
; ZVFHMIN32-NEXT: add a0, sp, a0
; ZVFHMIN32-NEXT: lh s7, 848(a0) # 8-byte Folded Reload
; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: li a1, 24
+; ZVFHMIN32-NEXT: li a1, 25
; ZVFHMIN32-NEXT: mul a0, a0, a1
; ZVFHMIN32-NEXT: add a0, sp, a0
; ZVFHMIN32-NEXT: lh s8, 848(a0) # 8-byte Folded Reload
@@ -1585,12 +1588,12 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN32-NEXT: lh t5, 732(sp)
; ZVFHMIN32-NEXT: lh t6, 476(sp)
; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: li a1, 23
+; ZVFHMIN32-NEXT: li a1, 24
; ZVFHMIN32-NEXT: mul a0, a0, a1
; ZVFHMIN32-NEXT: add a0, sp, a0
; ZVFHMIN32-NEXT: lh s4, 848(a0) # 8-byte Folded Reload
; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: li a1, 22
+; ZVFHMIN32-NEXT: li a1, 23
; ZVFHMIN32-NEXT: mul a0, a0, a1
; ZVFHMIN32-NEXT: add a0, sp, a0
; ZVFHMIN32-NEXT: lh s3, 848(a0) # 8-byte Folded Reload
@@ -1601,23 +1604,19 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN32-NEXT: lh t6, 730(sp)
; ZVFHMIN32-NEXT: lh s9, 474(sp)
; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: li a1, 21
+; ZVFHMIN32-NEXT: li a1, 22
; ZVFHMIN32-NEXT: mul a0, a0, a1
; ZVFHMIN32-NEXT: add a0, sp, a0
; ZVFHMIN32-NEXT: lh s2, 848(a0) # 8-byte Folded Reload
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: slli a1, a0, 1
-; ZVFHMIN32-NEXT: add a0, a1, a0
-; ZVFHMIN32-NEXT: add a0, sp, a0
-; ZVFHMIN32-NEXT: lh t5, 848(a0) # 8-byte Folded Reload
+; ZVFHMIN32-NEXT: vmv.x.s t5, v21
; ZVFHMIN32-NEXT: fmv.h.x fa5, t6
; ZVFHMIN32-NEXT: fmv.h.x fa4, s9
; ZVFHMIN32-NEXT: feq.h t6, fa5, fa4
; ZVFHMIN32-NEXT: sb t6, 173(sp)
; ZVFHMIN32-NEXT: lh s9, 728(sp)
; ZVFHMIN32-NEXT: lh s10, 472(sp)
-; ZVFHMIN32-NEXT: vmv.x.s t6, v21
-; ZVFHMIN32-NEXT: vmv.x.s ra, v29
+; ZVFHMIN32-NEXT: vmv.x.s t6, v23
+; ZVFHMIN32-NEXT: vmv.x.s ra, v9
; ZVFHMIN32-NEXT: fmv.h.x fa5, s9
; ZVFHMIN32-NEXT: fmv.h.x fa4, s10
; ZVFHMIN32-NEXT: feq.h s9, fa5, fa4
@@ -1625,7 +1624,7 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN32-NEXT: lh s9, 726(sp)
; ZVFHMIN32-NEXT: lh s10, 470(sp)
; ZVFHMIN32-NEXT: vmv.x.s a2, v19
-; ZVFHMIN32-NEXT: vmv.x.s a3, v27
+; ZVFHMIN32-NEXT: vmv.x.s a3, v3
; ZVFHMIN32-NEXT: fmv.h.x fa5, s9
; ZVFHMIN32-NEXT: fmv.h.x fa4, s10
; ZVFHMIN32-NEXT: feq.h s9, fa5, fa4
@@ -1640,7 +1639,7 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN32-NEXT: sb s10, 170(sp)
; ZVFHMIN32-NEXT: lh a0, 722(sp)
; ZVFHMIN32-NEXT: lh a1, 466(sp)
-; ZVFHMIN32-NEXT: vmv.x.s s10, v9
+; ZVFHMIN32-NEXT: vmv.x.s s10, v16
; ZVFHMIN32-NEXT: vmv.x.s s11, v11
; ZVFHMIN32-NEXT: fmv.h.x fa5, a0
; ZVFHMIN32-NEXT: fmv.h.x fa4, a1
@@ -1724,27 +1723,27 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN32-NEXT: sb a1, 161(sp)
; ZVFHMIN32-NEXT: lh a0, 610(sp)
; ZVFHMIN32-NEXT: lh a1, 354(sp)
-; ZVFHMIN32-NEXT: csrr a2, vlenb
-; ZVFHMIN32-NEXT: li a3, 18
-; ZVFHMIN32-NEXT: mul a2, a2, a3
-; ZVFHMIN32-NEXT: add a2, sp, a2
-; ZVFHMIN32-NEXT: lh s6, 848(a2) # 8-byte Folded Reload
-; ZVFHMIN32-NEXT: vmv.x.s s5, v23
+; ZVFHMIN32-NEXT: vmv.x.s s6, v29
+; ZVFHMIN32-NEXT: vmv.x.s s5, v31
; ZVFHMIN32-NEXT: fmv.h.x fa5, a0
; ZVFHMIN32-NEXT: fmv.h.x fa4, a1
; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4
; ZVFHMIN32-NEXT: sb a0, 241(sp)
; ZVFHMIN32-NEXT: lh a0, 608(sp)
; ZVFHMIN32-NEXT: lh a1, 352(sp)
-; ZVFHMIN32-NEXT: vmv.x.s s4, v31
-; ZVFHMIN32-NEXT: lh s3, 848(sp) # 8-byte Folded Reload
+; ZVFHMIN32-NEXT: lh s4, 848(sp) # 8-byte Folded Reload
+; ZVFHMIN32-NEXT: vmv.x.s s3, v27
; ZVFHMIN32-NEXT: fmv.h.x fa5, a0
; ZVFHMIN32-NEXT: fmv.h.x fa4, a1
; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4
; ZVFHMIN32-NEXT: sb a0, 240(sp)
; ZVFHMIN32-NEXT: lh a0, 606(sp)
; ZVFHMIN32-NEXT: lh a1, 350(sp)
-; ZVFHMIN32-NEXT: vmv.x.s s2, v7
+; ZVFHMIN32-NEXT: csrr a2, vlenb
+; ZVFHMIN32-NEXT: li a3, 19
+; ZVFHMIN32-NEXT: mul a2, a2, a3
+; ZVFHMIN32-NEXT: add a2, sp, a2
+; ZVFHMIN32-NEXT: lh s2, 848(a2) # 8-byte Folded Reload
; ZVFHMIN32-NEXT: fmv.h.x fa5, t5
; ZVFHMIN32-NEXT: fmv.h.x fa4, a0
; ZVFHMIN32-NEXT: fmv.h.x fa3, a1
@@ -1879,15 +1878,15 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN32-NEXT: lh a0, 766(sp)
; ZVFHMIN32-NEXT: lh a1, 510(sp)
; ZVFHMIN32-NEXT: csrr a2, vlenb
-; ZVFHMIN32-NEXT: li a3, 19
+; ZVFHMIN32-NEXT: li a3, 20
; ZVFHMIN32-NEXT: mul a2, a2, a3
; ZVFHMIN32-NEXT: add a2, sp, a2
; ZVFHMIN32-NEXT: addi a2, a2, 848
; ZVFHMIN32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
; ZVFHMIN32-NEXT: vmv.x.s s2, v8
; ZVFHMIN32-NEXT: csrr a2, vlenb
-; ZVFHMIN32-NEXT: li a3, 14
-; ZVFHMIN32-NEXT: mul a2, a2, a3
+; ZVFHMIN32-NEXT: slli a3, a2, 4
+; ZVFHMIN32-NEXT: sub a2, a3, a2
; ZVFHMIN32-NEXT: add a2, sp, a2
; ZVFHMIN32-NEXT: addi a2, a2, 848
; ZVFHMIN32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
@@ -1899,13 +1898,19 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN32-NEXT: lh a0, 764(sp)
; ZVFHMIN32-NEXT: lh a1, 508(sp)
; ZVFHMIN32-NEXT: csrr a2, vlenb
-; ZVFHMIN32-NEXT: li a3, 12
+; ZVFHMIN32-NEXT: li a3, 13
; ZVFHMIN32-NEXT: mul a2, a2, a3
; ZVFHMIN32-NEXT: add a2, sp, a2
; ZVFHMIN32-NEXT: addi a2, a2, 848
; ZVFHMIN32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
; ZVFHMIN32-NEXT: vmv.x.s t6, v8
-; ZVFHMIN32-NEXT: vmv.x.s a2, v16
+; ZVFHMIN32-NEXT: csrr a2, vlenb
+; ZVFHMIN32-NEXT: slli a3, a2, 1
+; ZVFHMIN32-NEXT: add a2, a3, a2
+; ZVFHMIN32-NEXT: add a2, sp, a2
+; ZVFHMIN32-NEXT: addi a2, a2, 848
+; ZVFHMIN32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
+; ZVFHMIN32-NEXT: vmv.x.s a2, v8
; ZVFHMIN32-NEXT: fmv.h.x fa5, a0
; ZVFHMIN32-NEXT: fmv.h.x fa4, a1
; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4
@@ -1913,14 +1918,15 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN32-NEXT: lh a0, 762(sp)
; ZVFHMIN32-NEXT: lh a1, 506(sp)
; ZVFHMIN32-NEXT: csrr a3, vlenb
-; ZVFHMIN32-NEXT: li a4, 6
-; ZVFHMIN32-NEXT: mul a3, a3, a4
+; ZVFHMIN32-NEXT: slli a4, a3, 3
+; ZVFHMIN32-NEXT: sub a3, a4, a3
; ZVFHMIN32-NEXT: add a3, sp, a3
; ZVFHMIN32-NEXT: addi a3, a3, 848
; ZVFHMIN32-NEXT: vl2r.v v8, (a3) # Unknown-size Folded Reload
; ZVFHMIN32-NEXT: vmv.x.s a3, v8
; ZVFHMIN32-NEXT: csrr a4, vlenb
-; ZVFHMIN32-NEXT: slli a4, a4, 2
+; ZVFHMIN32-NEXT: slli s3, a4, 2
+; ZVFHMIN32-NEXT: add a4, s3, a4
; ZVFHMIN32-NEXT: add a4, sp, a4
; ZVFHMIN32-NEXT: addi a4, a4, 848
; ZVFHMIN32-NEXT: vl2r.v v8, (a4) # Unknown-size Folded Reload
@@ -1932,14 +1938,15 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN32-NEXT: lh a0, 760(sp)
; ZVFHMIN32-NEXT: lh a1, 504(sp)
; ZVFHMIN32-NEXT: csrr s3, vlenb
-; ZVFHMIN32-NEXT: li s4, 10
+; ZVFHMIN32-NEXT: li s4, 11
; ZVFHMIN32-NEXT: mul s3, s3, s4
; ZVFHMIN32-NEXT: add s3, sp, s3
; ZVFHMIN32-NEXT: addi s3, s3, 848
; ZVFHMIN32-NEXT: vl2r.v v8, (s3) # Unknown-size Folded Reload
; ZVFHMIN32-NEXT: vmv.x.s s6, v8
; ZVFHMIN32-NEXT: csrr s3, vlenb
-; ZVFHMIN32-NEXT: slli s3, s3, 3
+; ZVFHMIN32-NEXT: slli s4, s3, 3
+; ZVFHMIN32-NEXT: add s3, s4, s3
; ZVFHMIN32-NEXT: add s3, sp, s3
; ZVFHMIN32-NEXT: addi s3, s3, 848
; ZVFHMIN32-NEXT: vl2r.v v8, (s3) # Unknown-size Folded Reload
@@ -1951,12 +1958,13 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN32-NEXT: lh a0, 758(sp)
; ZVFHMIN32-NEXT: lh a1, 502(sp)
; ZVFHMIN32-NEXT: csrr s3, vlenb
-; ZVFHMIN32-NEXT: slli s3, s3, 4
+; ZVFHMIN32-NEXT: slli s5, s3, 4
+; ZVFHMIN32-NEXT: add s3, s5, s3
; ZVFHMIN32-NEXT: add s3, sp, s3
; ZVFHMIN32-NEXT: addi s3, s3, 848
; ZVFHMIN32-NEXT: vl2r.v v8, (s3) # Unknown-size Folded Reload
; ZVFHMIN32-NEXT: vmv.x.s s5, v8
-; ZVFHMIN32-NEXT: vmv.x.s s3, v2
+; ZVFHMIN32-NEXT: vmv.x.s s3, v6
; ZVFHMIN32-NEXT: fmv.h.x fa5, a0
; ZVFHMIN32-NEXT: fmv.h.x fa4, a1
; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4
@@ -2253,7 +2261,7 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN64-NEXT: addi s0, sp, 896
; ZVFHMIN64-NEXT: .cfi_def_cfa s0, 0
; ZVFHMIN64-NEXT: csrr a1, vlenb
-; ZVFHMIN64-NEXT: li a2, 28
+; ZVFHMIN64-NEXT: li a2, 29
; ZVFHMIN64-NEXT: mul a1, a1, a2
; ZVFHMIN64-NEXT: sub sp, sp, a1
; ZVFHMIN64-NEXT: andi sp, sp, -128
@@ -2425,49 +2433,49 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; ZVFHMIN64-NEXT: vslidedown.vi v10, v8, 7
; ZVFHMIN64-NEXT: csrr a2, vlenb
-; ZVFHMIN64-NEXT: li a3, 27
+; ZVFHMIN64-NEXT: li a3, 28
; ZVFHMIN64-NEXT: mul a2, a2, a3
; ZVFHMIN64-NEXT: add a2, sp, a2
; ZVFHMIN64-NEXT: addi a2, a2, 800
; ZVFHMIN64-NEXT: vs1r.v v10, (a2) # Unknown-size Folded Spill
; ZVFHMIN64-NEXT: vslidedown.vi v10, v8, 6
; ZVFHMIN64-NEXT: csrr a2, vlenb
-; ZVFHMIN64-NEXT: li a3, 26
+; ZVFHMIN64-NEXT: li a3, 27
; ZVFHMIN64-NEXT: mul a2, a2, a3
; ZVFHMIN64-NEXT: add a2, sp, a2
; ZVFHMIN64-NEXT: addi a2, a2, 800
; ZVFHMIN64-NEXT: vs1r.v v10, (a2) # Unknown-size Folded Spill
; ZVFHMIN64-NEXT: vslidedown.vi v10, v8, 5
; ZVFHMIN64-NEXT: csrr a2, vlenb
-; ZVFHMIN64-NEXT: li a3, 25
+; ZVFHMIN64-NEXT: li a3, 26
; ZVFHMIN64-NEXT: mul a2, a2, a3
; ZVFHMIN64-NEXT: add a2, sp, a2
; ZVFHMIN64-NEXT: addi a2, a2, 800
; ZVFHMIN64-NEXT: vs1r.v v10, (a2) # Unknown-size Folded Spill
; ZVFHMIN64-NEXT: vslidedown.vi v10, v8, 4
; ZVFHMIN64-NEXT: csrr a2, vlenb
-; ZVFHMIN64-NEXT: li a3, 24
+; ZVFHMIN64-NEXT: li a3, 25
; ZVFHMIN64-NEXT: mul a2, a2, a3
; ZVFHMIN64-NEXT: add a2, sp, a2
; ZVFHMIN64-NEXT: addi a2, a2, 800
; ZVFHMIN64-NEXT: vs1r.v v10, (a2) # Unknown-size Folded Spill
; ZVFHMIN64-NEXT: vslidedown.vi v10, v8, 3
; ZVFHMIN64-NEXT: csrr a2, vlenb
-; ZVFHMIN64-NEXT: li a3, 23
+; ZVFHMIN64-NEXT: li a3, 24
; ZVFHMIN64-NEXT: mul a2, a2, a3
; ZVFHMIN64-NEXT: add a2, sp, a2
; ZVFHMIN64-NEXT: addi a2, a2, 800
; ZVFHMIN64-NEXT: vs1r.v v10, (a2) # Unknown-size Folded Spill
; ZVFHMIN64-NEXT: vslidedown.vi v10, v8, 2
; ZVFHMIN64-NEXT: csrr a2, vlenb
-; ZVFHMIN64-NEXT: li a3, 22
+; ZVFHMIN64-NEXT: li a3, 23
; ZVFHMIN64-NEXT: mul a2, a2, a3
; ZVFHMIN64-NEXT: add a2, sp, a2
; ZVFHMIN64-NEXT: addi a2, a2, 800
; ZVFHMIN64-NEXT: vs1r.v v10, (a2) # Unknown-size Folded Spill
; ZVFHMIN64-NEXT: vslidedown.vi v10, v8, 1
; ZVFHMIN64-NEXT: csrr a2, vlenb
-; ZVFHMIN64-NEXT: li a3, 21
+; ZVFHMIN64-NEXT: li a3, 22
; ZVFHMIN64-NEXT: mul a2, a2, a3
; ZVFHMIN64-NEXT: add a2, sp, a2
; ZVFHMIN64-NEXT: addi a2, a2, 800
@@ -2479,8 +2487,8 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN64-NEXT: add a2, sp, a2
; ZVFHMIN64-NEXT: addi a2, a2, 800
; ZVFHMIN64-NEXT: vs2r.v v10, (a2) # Unknown-size Folded Spill
-; ZVFHMIN64-NEXT: vslidedown.vi v30, v8, 13
-; ZVFHMIN64-NEXT: vslidedown.vi v6, v8, 12
+; ZVFHMIN64-NEXT: vslidedown.vi v2, v8, 13
+; ZVFHMIN64-NEXT: vslidedown.vi v30, v8, 12
; ZVFHMIN64-NEXT: vslidedown.vi v28, v8, 11
; ZVFHMIN64-NEXT: vslidedown.vi v26, v8, 10
; ZVFHMIN64-NEXT: vslidedown.vi v22, v8, 9
@@ -2493,50 +2501,44 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN64-NEXT: lh a0, 560(sp)
; ZVFHMIN64-NEXT: lh a1, 304(sp)
; ZVFHMIN64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; ZVFHMIN64-NEXT: vslidedown.vi v8, v16, 7
-; ZVFHMIN64-NEXT: csrr a2, vlenb
-; ZVFHMIN64-NEXT: slli a3, a2, 1
-; ZVFHMIN64-NEXT: add a2, a3, a2
-; ZVFHMIN64-NEXT: add a2, sp, a2
-; ZVFHMIN64-NEXT: addi a2, a2, 800
+; ZVFHMIN64-NEXT: vslidedown.vi v21, v16, 7
+; ZVFHMIN64-NEXT: vslidedown.vi v23, v16, 6
+; ZVFHMIN64-NEXT: vslidedown.vi v29, v16, 5
+; ZVFHMIN64-NEXT: vslidedown.vi v31, v16, 4
+; ZVFHMIN64-NEXT: vslidedown.vi v8, v16, 3
+; ZVFHMIN64-NEXT: addi a2, sp, 800
; ZVFHMIN64-NEXT: vs1r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN64-NEXT: vslidedown.vi v21, v16, 6
-; ZVFHMIN64-NEXT: vslidedown.vi v8, v16, 5
+; ZVFHMIN64-NEXT: vslidedown.vi v27, v16, 2
+; ZVFHMIN64-NEXT: vslidedown.vi v8, v16, 1
; ZVFHMIN64-NEXT: csrr a2, vlenb
-; ZVFHMIN64-NEXT: li a3, 18
+; ZVFHMIN64-NEXT: li a3, 19
; ZVFHMIN64-NEXT: mul a2, a2, a3
; ZVFHMIN64-NEXT: add a2, sp, a2
; ZVFHMIN64-NEXT: addi a2, a2, 800
; ZVFHMIN64-NEXT: vs1r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN64-NEXT: vslidedown.vi v23, v16, 4
-; ZVFHMIN64-NEXT: vslidedown.vi v31, v16, 3
-; ZVFHMIN64-NEXT: vslidedown.vi v8, v16, 2
-; ZVFHMIN64-NEXT: addi a2, sp, 800
-; ZVFHMIN64-NEXT: vs1r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN64-NEXT: vslidedown.vi v7, v16, 1
; ZVFHMIN64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; ZVFHMIN64-NEXT: vslidedown.vi v14, v16, 15
; ZVFHMIN64-NEXT: vslidedown.vi v12, v16, 14
; ZVFHMIN64-NEXT: vslidedown.vi v8, v16, 13
; ZVFHMIN64-NEXT: vslidedown.vi v18, v16, 12
; ZVFHMIN64-NEXT: vslidedown.vi v10, v16, 11
-; ZVFHMIN64-NEXT: vslidedown.vi v2, v16, 10
+; ZVFHMIN64-NEXT: vslidedown.vi v6, v16, 10
; ZVFHMIN64-NEXT: csrr a2, vlenb
-; ZVFHMIN64-NEXT: li a3, 19
+; ZVFHMIN64-NEXT: li a3, 20
; ZVFHMIN64-NEXT: mul a2, a2, a3
; ZVFHMIN64-NEXT: add a2, sp, a2
; ZVFHMIN64-NEXT: addi a2, a2, 800
-; ZVFHMIN64-NEXT: vs2r.v v2, (a2) # Unknown-size Folded Spill
-; ZVFHMIN64-NEXT: vslidedown.vi v2, v16, 9
+; ZVFHMIN64-NEXT: vs2r.v v6, (a2) # Unknown-size Folded Spill
+; ZVFHMIN64-NEXT: vslidedown.vi v6, v16, 9
; ZVFHMIN64-NEXT: csrr a2, vlenb
-; ZVFHMIN64-NEXT: li a3, 14
-; ZVFHMIN64-NEXT: mul a2, a2, a3
+; ZVFHMIN64-NEXT: slli a3, a2, 4
+; ZVFHMIN64-NEXT: sub a2, a3, a2
; ZVFHMIN64-NEXT: add a2, sp, a2
; ZVFHMIN64-NEXT: addi a2, a2, 800
-; ZVFHMIN64-NEXT: vs2r.v v2, (a2) # Unknown-size Folded Spill
+; ZVFHMIN64-NEXT: vs2r.v v6, (a2) # Unknown-size Folded Spill
; ZVFHMIN64-NEXT: vslidedown.vi v16, v16, 8
; ZVFHMIN64-NEXT: csrr a2, vlenb
-; ZVFHMIN64-NEXT: li a3, 12
+; ZVFHMIN64-NEXT: li a3, 13
; ZVFHMIN64-NEXT: mul a2, a2, a3
; ZVFHMIN64-NEXT: add a2, sp, a2
; ZVFHMIN64-NEXT: addi a2, a2, 800
@@ -2548,48 +2550,57 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN64-NEXT: lh a0, 558(sp)
; ZVFHMIN64-NEXT: lh a1, 302(sp)
; ZVFHMIN64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; ZVFHMIN64-NEXT: vslidedown.vi v29, v0, 7
+; ZVFHMIN64-NEXT: vslidedown.vi v9, v0, 7
; ZVFHMIN64-NEXT: vslidedown.vi v19, v0, 6
-; ZVFHMIN64-NEXT: vslidedown.vi v27, v0, 5
+; ZVFHMIN64-NEXT: vslidedown.vi v3, v0, 5
; ZVFHMIN64-NEXT: vslidedown.vi v13, v0, 4
; ZVFHMIN64-NEXT: vslidedown.vi v15, v0, 3
-; ZVFHMIN64-NEXT: vslidedown.vi v9, v0, 2
+; ZVFHMIN64-NEXT: vslidedown.vi v16, v0, 2
; ZVFHMIN64-NEXT: vslidedown.vi v11, v0, 1
; ZVFHMIN64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
-; ZVFHMIN64-NEXT: vslidedown.vi v16, v0, 15
-; ZVFHMIN64-NEXT: vslidedown.vi v2, v0, 14
+; ZVFHMIN64-NEXT: vslidedown.vi v6, v0, 15
; ZVFHMIN64-NEXT: csrr a2, vlenb
-; ZVFHMIN64-NEXT: li a3, 6
-; ZVFHMIN64-NEXT: mul a2, a2, a3
+; ZVFHMIN64-NEXT: slli a3, a2, 1
+; ZVFHMIN64-NEXT: add a2, a3, a2
; ZVFHMIN64-NEXT: add a2, sp, a2
; ZVFHMIN64-NEXT: addi a2, a2, 800
-; ZVFHMIN64-NEXT: vs2r.v v2, (a2) # Unknown-size Folded Spill
-; ZVFHMIN64-NEXT: vslidedown.vi v2, v0, 13
+; ZVFHMIN64-NEXT: vs2r.v v6, (a2) # Unknown-size Folded Spill
+; ZVFHMIN64-NEXT: vslidedown.vi v6, v0, 14
; ZVFHMIN64-NEXT: csrr a2, vlenb
-; ZVFHMIN64-NEXT: slli a2, a2, 2
+; ZVFHMIN64-NEXT: slli a3, a2, 3
+; ZVFHMIN64-NEXT: sub a2, a3, a2
; ZVFHMIN64-NEXT: add a2, sp, a2
; ZVFHMIN64-NEXT: addi a2, a2, 800
-; ZVFHMIN64-NEXT: vs2r.v v2, (a2) # Unknown-size Folded Spill
-; ZVFHMIN64-NEXT: vslidedown.vi v2, v0, 12
+; ZVFHMIN64-NEXT: vs2r.v v6, (a2) # Unknown-size Folded Spill
+; ZVFHMIN64-NEXT: vslidedown.vi v6, v0, 13
; ZVFHMIN64-NEXT: csrr a2, vlenb
-; ZVFHMIN64-NEXT: li a3, 10
+; ZVFHMIN64-NEXT: slli a3, a2, 2
+; ZVFHMIN64-NEXT: add a2, a3, a2
+; ZVFHMIN64-NEXT: add a2, sp, a2
+; ZVFHMIN64-NEXT: addi a2, a2, 800
+; ZVFHMIN64-NEXT: vs2r.v v6, (a2) # Unknown-size Folded Spill
+; ZVFHMIN64-NEXT: vslidedown.vi v6, v0, 12
+; ZVFHMIN64-NEXT: csrr a2, vlenb
+; ZVFHMIN64-NEXT: li a3, 11
; ZVFHMIN64-NEXT: mul a2, a2, a3
; ZVFHMIN64-NEXT: add a2, sp, a2
; ZVFHMIN64-NEXT: addi a2, a2, 800
-; ZVFHMIN64-NEXT: vs2r.v v2, (a2) # Unknown-size Folded Spill
-; ZVFHMIN64-NEXT: vslidedown.vi v2, v0, 11
+; ZVFHMIN64-NEXT: vs2r.v v6, (a2) # Unknown-size Folded Spill
+; ZVFHMIN64-NEXT: vslidedown.vi v6, v0, 11
; ZVFHMIN64-NEXT: csrr a2, vlenb
-; ZVFHMIN64-NEXT: slli a2, a2, 3
+; ZVFHMIN64-NEXT: slli a3, a2, 3
+; ZVFHMIN64-NEXT: add a2, a3, a2
; ZVFHMIN64-NEXT: add a2, sp, a2
; ZVFHMIN64-NEXT: addi a2, a2, 800
-; ZVFHMIN64-NEXT: vs2r.v v2, (a2) # Unknown-size Folded Spill
-; ZVFHMIN64-NEXT: vslidedown.vi v2, v0, 10
+; ZVFHMIN64-NEXT: vs2r.v v6, (a2) # Unknown-size Folded Spill
+; ZVFHMIN64-NEXT: vslidedown.vi v6, v0, 10
; ZVFHMIN64-NEXT: csrr a2, vlenb
-; ZVFHMIN64-NEXT: slli a2, a2, 4
+; ZVFHMIN64-NEXT: slli a3, a2, 4
+; ZVFHMIN64-NEXT: add a2, a3, a2
; ZVFHMIN64-NEXT: add a2, sp, a2
; ZVFHMIN64-NEXT: addi a2, a2, 800
-; ZVFHMIN64-NEXT: vs2r.v v2, (a2) # Unknown-size Folded Spill
-; ZVFHMIN64-NEXT: vslidedown.vi v2, v0, 9
+; ZVFHMIN64-NEXT: vs2r.v v6, (a2) # Unknown-size Folded Spill
+; ZVFHMIN64-NEXT: vslidedown.vi v6, v0, 9
; ZVFHMIN64-NEXT: vslidedown.vi v0, v0, 8
; ZVFHMIN64-NEXT: vmv.x.s t3, v4
; ZVFHMIN64-NEXT: fmv.h.x fa5, a0
@@ -2603,14 +2614,14 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN64-NEXT: addi a2, a2, 800
; ZVFHMIN64-NEXT: vl2r.v v4, (a2) # Unknown-size Folded Reload
; ZVFHMIN64-NEXT: vmv.x.s t4, v4
-; ZVFHMIN64-NEXT: vmv.x.s t2, v30
+; ZVFHMIN64-NEXT: vmv.x.s t2, v2
; ZVFHMIN64-NEXT: fmv.h.x fa5, a0
; ZVFHMIN64-NEXT: fmv.h.x fa4, a1
; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4
; ZVFHMIN64-NEXT: sb a0, 214(sp)
; ZVFHMIN64-NEXT: lh a0, 554(sp)
; ZVFHMIN64-NEXT: lh a1, 298(sp)
-; ZVFHMIN64-NEXT: vmv.x.s t1, v6
+; ZVFHMIN64-NEXT: vmv.x.s t1, v30
; ZVFHMIN64-NEXT: vmv.x.s t0, v28
; ZVFHMIN64-NEXT: fmv.h.x fa5, a0
; ZVFHMIN64-NEXT: fmv.h.x fa4, a1
@@ -2673,12 +2684,12 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN64-NEXT: lh t5, 736(sp)
; ZVFHMIN64-NEXT: lh t6, 480(sp)
; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: li a1, 27
+; ZVFHMIN64-NEXT: li a1, 28
; ZVFHMIN64-NEXT: mul a0, a0, a1
; ZVFHMIN64-NEXT: add a0, sp, a0
; ZVFHMIN64-NEXT: lh s5, 800(a0) # 8-byte Folded Reload
; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: li a1, 26
+; ZVFHMIN64-NEXT: li a1, 27
; ZVFHMIN64-NEXT: mul a0, a0, a1
; ZVFHMIN64-NEXT: add a0, sp, a0
; ZVFHMIN64-NEXT: lh s6, 800(a0) # 8-byte Folded Reload
@@ -2689,12 +2700,12 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN64-NEXT: lh t5, 734(sp)
; ZVFHMIN64-NEXT: lh t6, 478(sp)
; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: li a1, 25
+; ZVFHMIN64-NEXT: li a1, 26
; ZVFHMIN64-NEXT: mul a0, a0, a1
; ZVFHMIN64-NEXT: add a0, sp, a0
; ZVFHMIN64-NEXT: lh s7, 800(a0) # 8-byte Folded Reload
; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: li a1, 24
+; ZVFHMIN64-NEXT: li a1, 25
; ZVFHMIN64-NEXT: mul a0, a0, a1
; ZVFHMIN64-NEXT: add a0, sp, a0
; ZVFHMIN64-NEXT: lh s8, 800(a0) # 8-byte Folded Reload
@@ -2705,12 +2716,12 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN64-NEXT: lh t5, 732(sp)
; ZVFHMIN64-NEXT: lh t6, 476(sp)
; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: li a1, 23
+; ZVFHMIN64-NEXT: li a1, 24
; ZVFHMIN64-NEXT: mul a0, a0, a1
; ZVFHMIN64-NEXT: add a0, sp, a0
; ZVFHMIN64-NEXT: lh s4, 800(a0) # 8-byte Folded Reload
; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: li a1, 22
+; ZVFHMIN64-NEXT: li a1, 23
; ZVFHMIN64-NEXT: mul a0, a0, a1
; ZVFHMIN64-NEXT: add a0, sp, a0
; ZVFHMIN64-NEXT: lh s3, 800(a0) # 8-byte Folded Reload
@@ -2721,23 +2732,19 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN64-NEXT: lh t6, 730(sp)
; ZVFHMIN64-NEXT: lh s9, 474(sp)
; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: li a1, 21
+; ZVFHMIN64-NEXT: li a1, 22
; ZVFHMIN64-NEXT: mul a0, a0, a1
; ZVFHMIN64-NEXT: add a0, sp, a0
; ZVFHMIN64-NEXT: lh s2, 800(a0) # 8-byte Folded Reload
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: slli a1, a0, 1
-; ZVFHMIN64-NEXT: add a0, a1, a0
-; ZVFHMIN64-NEXT: add a0, sp, a0
-; ZVFHMIN64-NEXT: lh t5, 800(a0) # 8-byte Folded Reload
+; ZVFHMIN64-NEXT: vmv.x.s t5, v21
; ZVFHMIN64-NEXT: fmv.h.x fa5, t6
; ZVFHMIN64-NEXT: fmv.h.x fa4, s9
; ZVFHMIN64-NEXT: feq.h t6, fa5, fa4
; ZVFHMIN64-NEXT: sb t6, 173(sp)
; ZVFHMIN64-NEXT: lh s9, 728(sp)
; ZVFHMIN64-NEXT: lh s10, 472(sp)
-; ZVFHMIN64-NEXT: vmv.x.s t6, v21
-; ZVFHMIN64-NEXT: vmv.x.s ra, v29
+; ZVFHMIN64-NEXT: vmv.x.s t6, v23
+; ZVFHMIN64-NEXT: vmv.x.s ra, v9
; ZVFHMIN64-NEXT: fmv.h.x fa5, s9
; ZVFHMIN64-NEXT: fmv.h.x fa4, s10
; ZVFHMIN64-NEXT: feq.h s9, fa5, fa4
@@ -2745,7 +2752,7 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN64-NEXT: lh s9, 726(sp)
; ZVFHMIN64-NEXT: lh s10, 470(sp)
; ZVFHMIN64-NEXT: vmv.x.s a2, v19
-; ZVFHMIN64-NEXT: vmv.x.s a3, v27
+; ZVFHMIN64-NEXT: vmv.x.s a3, v3
; ZVFHMIN64-NEXT: fmv.h.x fa5, s9
; ZVFHMIN64-NEXT: fmv.h.x fa4, s10
; ZVFHMIN64-NEXT: feq.h s9, fa5, fa4
@@ -2760,7 +2767,7 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN64-NEXT: sb s10, 170(sp)
; ZVFHMIN64-NEXT: lh a0, 722(sp)
; ZVFHMIN64-NEXT: lh a1, 466(sp)
-; ZVFHMIN64-NEXT: vmv.x.s s10, v9
+; ZVFHMIN64-NEXT: vmv.x.s s10, v16
; ZVFHMIN64-NEXT: vmv.x.s s11, v11
; ZVFHMIN64-NEXT: fmv.h.x fa5, a0
; ZVFHMIN64-NEXT: fmv.h.x fa4, a1
@@ -2844,27 +2851,27 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN64-NEXT: sb a1, 161(sp)
; ZVFHMIN64-NEXT: lh a0, 610(sp)
; ZVFHMIN64-NEXT: lh a1, 354(sp)
-; ZVFHMIN64-NEXT: csrr a2, vlenb
-; ZVFHMIN64-NEXT: li a3, 18
-; ZVFHMIN64-NEXT: mul a2, a2, a3
-; ZVFHMIN64-NEXT: add a2, sp, a2
-; ZVFHMIN64-NEXT: lh s6, 800(a2) # 8-byte Folded Reload
-; ZVFHMIN64-NEXT: vmv.x.s s5, v23
+; ZVFHMIN64-NEXT: vmv.x.s s6, v29
+; ZVFHMIN64-NEXT: vmv.x.s s5, v31
; ZVFHMIN64-NEXT: fmv.h.x fa5, a0
; ZVFHMIN64-NEXT: fmv.h.x fa4, a1
; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4
; ZVFHMIN64-NEXT: sb a0, 241(sp)
; ZVFHMIN64-NEXT: lh a0, 608(sp)
; ZVFHMIN64-NEXT: lh a1, 352(sp)
-; ZVFHMIN64-NEXT: vmv.x.s s4, v31
-; ZVFHMIN64-NEXT: lh s3, 800(sp) # 8-byte Folded Reload
+; ZVFHMIN64-NEXT: lh s4, 800(sp) # 8-byte Folded Reload
+; ZVFHMIN64-NEXT: vmv.x.s s3, v27
; ZVFHMIN64-NEXT: fmv.h.x fa5, a0
; ZVFHMIN64-NEXT: fmv.h.x fa4, a1
; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4
; ZVFHMIN64-NEXT: sb a0, 240(sp)
; ZVFHMIN64-NEXT: lh a0, 606(sp)
; ZVFHMIN64-NEXT: lh a1, 350(sp)
-; ZVFHMIN64-NEXT: vmv.x.s s2, v7
+; ZVFHMIN64-NEXT: csrr a2, vlenb
+; ZVFHMIN64-NEXT: li a3, 19
+; ZVFHMIN64-NEXT: mul a2, a2, a3
+; ZVFHMIN64-NEXT: add a2, sp, a2
+; ZVFHMIN64-NEXT: lh s2, 800(a2) # 8-byte Folded Reload
; ZVFHMIN64-NEXT: fmv.h.x fa5, t5
; ZVFHMIN64-NEXT: fmv.h.x fa4, a0
; ZVFHMIN64-NEXT: fmv.h.x fa3, a1
@@ -2999,15 +3006,15 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN64-NEXT: lh a0, 766(sp)
; ZVFHMIN64-NEXT: lh a1, 510(sp)
; ZVFHMIN64-NEXT: csrr a2, vlenb
-; ZVFHMIN64-NEXT: li a3, 19
+; ZVFHMIN64-NEXT: li a3, 20
; ZVFHMIN64-NEXT: mul a2, a2, a3
; ZVFHMIN64-NEXT: add a2, sp, a2
; ZVFHMIN64-NEXT: addi a2, a2, 800
; ZVFHMIN64-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
; ZVFHMIN64-NEXT: vmv.x.s s2, v8
; ZVFHMIN64-NEXT: csrr a2, vlenb
-; ZVFHMIN64-NEXT: li a3, 14
-; ZVFHMIN64-NEXT: mul a2, a2, a3
+; ZVFHMIN64-NEXT: slli a3, a2, 4
+; ZVFHMIN64-NEXT: sub a2, a3, a2
; ZVFHMIN64-NEXT: add a2, sp, a2
; ZVFHMIN64-NEXT: addi a2, a2, 800
; ZVFHMIN64-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
@@ -3019,13 +3026,19 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN64-NEXT: lh a0, 764(sp)
; ZVFHMIN64-NEXT: lh a1, 508(sp)
; ZVFHMIN64-NEXT: csrr a2, vlenb
-; ZVFHMIN64-NEXT: li a3, 12
+; ZVFHMIN64-NEXT: li a3, 13
; ZVFHMIN64-NEXT: mul a2, a2, a3
; ZVFHMIN64-NEXT: add a2, sp, a2
; ZVFHMIN64-NEXT: addi a2, a2, 800
; ZVFHMIN64-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
; ZVFHMIN64-NEXT: vmv.x.s t6, v8
-; ZVFHMIN64-NEXT: vmv.x.s a2, v16
+; ZVFHMIN64-NEXT: csrr a2, vlenb
+; ZVFHMIN64-NEXT: slli a3, a2, 1
+; ZVFHMIN64-NEXT: add a2, a3, a2
+; ZVFHMIN64-NEXT: add a2, sp, a2
+; ZVFHMIN64-NEXT: addi a2, a2, 800
+; ZVFHMIN64-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
+; ZVFHMIN64-NEXT: vmv.x.s a2, v8
; ZVFHMIN64-NEXT: fmv.h.x fa5, a0
; ZVFHMIN64-NEXT: fmv.h.x fa4, a1
; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4
@@ -3033,14 +3046,15 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN64-NEXT: lh a0, 762(sp)
; ZVFHMIN64-NEXT: lh a1, 506(sp)
; ZVFHMIN64-NEXT: csrr a3, vlenb
-; ZVFHMIN64-NEXT: li a4, 6
-; ZVFHMIN64-NEXT: mul a3, a3, a4
+; ZVFHMIN64-NEXT: slli a4, a3, 3
+; ZVFHMIN64-NEXT: sub a3, a4, a3
; ZVFHMIN64-NEXT: add a3, sp, a3
; ZVFHMIN64-NEXT: addi a3, a3, 800
; ZVFHMIN64-NEXT: vl2r.v v8, (a3) # Unknown-size Folded Reload
; ZVFHMIN64-NEXT: vmv.x.s a3, v8
; ZVFHMIN64-NEXT: csrr a4, vlenb
-; ZVFHMIN64-NEXT: slli a4, a4, 2
+; ZVFHMIN64-NEXT: slli s3, a4, 2
+; ZVFHMIN64-NEXT: add a4, s3, a4
; ZVFHMIN64-NEXT: add a4, sp, a4
; ZVFHMIN64-NEXT: addi a4, a4, 800
; ZVFHMIN64-NEXT: vl2r.v v8, (a4) # Unknown-size Folded Reload
@@ -3052,14 +3066,15 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN64-NEXT: lh a0, 760(sp)
; ZVFHMIN64-NEXT: lh a1, 504(sp)
; ZVFHMIN64-NEXT: csrr s3, vlenb
-; ZVFHMIN64-NEXT: li s4, 10
+; ZVFHMIN64-NEXT: li s4, 11
; ZVFHMIN64-NEXT: mul s3, s3, s4
; ZVFHMIN64-NEXT: add s3, sp, s3
; ZVFHMIN64-NEXT: addi s3, s3, 800
; ZVFHMIN64-NEXT: vl2r.v v8, (s3) # Unknown-size Folded Reload
; ZVFHMIN64-NEXT: vmv.x.s s6, v8
; ZVFHMIN64-NEXT: csrr s3, vlenb
-; ZVFHMIN64-NEXT: slli s3, s3, 3
+; ZVFHMIN64-NEXT: slli s4, s3, 3
+; ZVFHMIN64-NEXT: add s3, s4, s3
; ZVFHMIN64-NEXT: add s3, sp, s3
; ZVFHMIN64-NEXT: addi s3, s3, 800
; ZVFHMIN64-NEXT: vl2r.v v8, (s3) # Unknown-size Folded Reload
@@ -3071,12 +3086,13 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFHMIN64-NEXT: lh a0, 758(sp)
; ZVFHMIN64-NEXT: lh a1, 502(sp)
; ZVFHMIN64-NEXT: csrr s3, vlenb
-; ZVFHMIN64-NEXT: slli s3, s3, 4
+; ZVFHMIN64-NEXT: slli s5, s3, 4
+; ZVFHMIN64-NEXT: add s3, s5, s3
; ZVFHMIN64-NEXT: add s3, sp, s3
; ZVFHMIN64-NEXT: addi s3, s3, 800
; ZVFHMIN64-NEXT: vl2r.v v8, (s3) # Unknown-size Folded Reload
; ZVFHMIN64-NEXT: vmv.x.s s5, v8
-; ZVFHMIN64-NEXT: vmv.x.s s3, v2
+; ZVFHMIN64-NEXT: vmv.x.s s3, v6
; ZVFHMIN64-NEXT: fmv.h.x fa5, a0
; ZVFHMIN64-NEXT: fmv.h.x fa4, a1
; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll
index d1980ee3b0a6f..d8dc1f3588633 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll
@@ -56,8 +56,8 @@ define <4 x i64> @vsext_v4i64_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl)
; CHECK-LABEL: vsext_v4i64_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vsext.vf8 v10, v8, v0.t
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vsext.vf8 v8, v10, v0.t
; CHECK-NEXT: ret
%v = call <4 x i64> @llvm.vp.sext.v4i64.v4i8(<4 x i8> %va, <4 x i1> %m, i32 %evl)
ret <4 x i64> %v
@@ -67,8 +67,8 @@ define <4 x i64> @vsext_v4i64_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) {
; CHECK-LABEL: vsext_v4i64_v4i8_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vsext.vf8 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vsext.vf8 v8, v10
; CHECK-NEXT: ret
%v = call <4 x i64> @llvm.vp.sext.v4i64.v4i8(<4 x i8> %va, <4 x i1> splat (i1 true), i32 %evl)
ret <4 x i64> %v
@@ -104,8 +104,8 @@ define <4 x i64> @vsext_v4i64_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl
; CHECK-LABEL: vsext_v4i64_v4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vsext.vf4 v10, v8, v0.t
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vsext.vf4 v8, v10, v0.t
; CHECK-NEXT: ret
%v = call <4 x i64> @llvm.vp.sext.v4i64.v4i16(<4 x i16> %va, <4 x i1> %m, i32 %evl)
ret <4 x i64> %v
@@ -115,8 +115,8 @@ define <4 x i64> @vsext_v4i64_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
; CHECK-LABEL: vsext_v4i64_v4i16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vsext.vf4 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vsext.vf4 v8, v10
; CHECK-NEXT: ret
%v = call <4 x i64> @llvm.vp.sext.v4i64.v4i16(<4 x i16> %va, <4 x i1> splat (i1 true), i32 %evl)
ret <4 x i64> %v
@@ -128,8 +128,8 @@ define <4 x i64> @vsext_v4i64_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl
; CHECK-LABEL: vsext_v4i64_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vsext.vf2 v10, v8, v0.t
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vsext.vf2 v8, v10, v0.t
; CHECK-NEXT: ret
%v = call <4 x i64> @llvm.vp.sext.v4i64.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
ret <4 x i64> %v
@@ -139,8 +139,8 @@ define <4 x i64> @vsext_v4i64_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
; CHECK-LABEL: vsext_v4i64_v4i32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vsext.vf2 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vsext.vf2 v8, v10
; CHECK-NEXT: ret
%v = call <4 x i64> @llvm.vp.sext.v4i64.v4i32(<4 x i32> %va, <4 x i1> splat (i1 true), i32 %evl)
ret <4 x i64> %v
@@ -153,24 +153,25 @@ define <32 x i64> @vsext_v32i64_v32i32(<32 x i32> %va, <32 x i1> %m, i32 zeroext
; CHECK: # %bb.0:
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v16, v0, 2
+; CHECK-NEXT: vslidedown.vi v24, v0, 2
; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB12_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: .LBB12_2:
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT: vsext.vf2 v24, v8, v0.t
+; CHECK-NEXT: vsext.vf2 v16, v8, v0.t
; CHECK-NEXT: addi a1, a0, -16
; CHECK-NEXT: sltu a0, a0, a1
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v8, 16
-; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vsext.vf2 v16, v8, v0.t
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vsext.vf2 v24, v8, v0.t
+; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv.v.v v16, v24
; CHECK-NEXT: ret
%v = call <32 x i64> @llvm.vp.sext.v32i64.v32i32(<32 x i32> %va, <32 x i1> %m, i32 %evl)
ret <32 x i64> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll
index 8d11ecacfa41e..c05f306424519 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll
@@ -212,17 +212,17 @@ define <16 x i1> @v16i1_v8i1(<8 x i1>) {
define <8 x i32> @v8i32_v4i32(<4 x i32>) {
; CHECK-LABEL: v8i32_v4i32:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v8
; CHECK-NEXT: lui a0, %hi(.LCPI5_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_0)
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vle16.v v11, (a0)
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: vslidedown.vx v10, v9, a0
+; CHECK-NEXT: vslidedown.vx v8, v11, a0
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vrgatherei16.vv v11, v8, v10
-; CHECK-NEXT: vrgatherei16.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vrgatherei16.vv v9, v10, v8
+; CHECK-NEXT: vrgatherei16.vv v8, v10, v11
; CHECK-NEXT: ret
%2 = shufflevector <4 x i32> %0, <4 x i32> poison, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 1, i32 2, i32 0, i32 3>
ret <8 x i32> %2
@@ -275,59 +275,59 @@ define <16 x i32> @v16i32_v4i32(<4 x i32>) {
define <32 x i32> @v32i32_v4i32(<4 x i32>) {
; CHECK-LABEL: v32i32_v4i32:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e32, m4, ta, ma
+; CHECK-NEXT: vmv1r.v v16, v8
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: lui a1, 135432
; CHECK-NEXT: addi a1, a1, 1161
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: lui a1, 270865
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT: vmv.v.i v10, 3
+; CHECK-NEXT: vmv.v.i v8, 3
; CHECK-NEXT: addi a1, a1, 548
-; CHECK-NEXT: vmerge.vim v10, v10, 2, v0
+; CHECK-NEXT: vmerge.vim v8, v8, 2, v0
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: lui a1, 100550
; CHECK-NEXT: addi a1, a1, 64
; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v10, v10, 0, v0
+; CHECK-NEXT: vmerge.vim v8, v8, 0, v0
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a1, a1, 2
; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v10, v10, 1, v0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vsext.vf2 v12, v10
+; CHECK-NEXT: vsext.vf2 v12, v8
; CHECK-NEXT: vslidedown.vx v20, v12, a1
; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
-; CHECK-NEXT: vrgatherei16.vv v17, v8, v20
+; CHECK-NEXT: vrgatherei16.vv v9, v16, v20
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vslidedown.vx v20, v20, a1
; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
-; CHECK-NEXT: vrgatherei16.vv v16, v8, v12
+; CHECK-NEXT: vrgatherei16.vv v8, v16, v12
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vslidedown.vx v12, v20, a1
; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
-; CHECK-NEXT: vrgatherei16.vv v18, v8, v20
+; CHECK-NEXT: vrgatherei16.vv v10, v16, v20
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vslidedown.vx v24, v12, a1
+; CHECK-NEXT: vslidedown.vx v20, v12, a1
; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
-; CHECK-NEXT: vrgatherei16.vv v19, v8, v12
+; CHECK-NEXT: vrgatherei16.vv v11, v16, v12
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vslidedown.vx v12, v24, a1
+; CHECK-NEXT: vslidedown.vx v24, v20, a1
; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
-; CHECK-NEXT: vrgatherei16.vv v20, v8, v24
+; CHECK-NEXT: vrgatherei16.vv v12, v16, v20
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vslidedown.vx v24, v12, a1
+; CHECK-NEXT: vslidedown.vx v20, v24, a1
; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
-; CHECK-NEXT: vrgatherei16.vv v21, v8, v12
-; CHECK-NEXT: vrgatherei16.vv v22, v8, v24
+; CHECK-NEXT: vrgatherei16.vv v13, v16, v24
+; CHECK-NEXT: vrgatherei16.vv v14, v16, v20
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vslidedown.vx v12, v24, a1
+; CHECK-NEXT: vslidedown.vx v20, v20, a1
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vrgatherei16.vv v23, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vrgatherei16.vv v15, v16, v20
; CHECK-NEXT: ret
%2 = shufflevector <4 x i32> %0, <4 x i32> poison, <32 x i32> <i32 2, i32 3, i32 0, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 2, i32 3, i32 0, i32 1, i32 1, i32 2, i32 0, i32 3, i32 1, i32 1, i32 2, i32 0, i32 3, i32 1, i32 2, i32 0, i32 3, i32 1, i32 1, i32 2, i32 0, i32 3>
ret <32 x i32> %2
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll
index d06ce0a817f9e..9629b3547b3d0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll
@@ -70,17 +70,17 @@ define <8 x i32> @m2_pair_swap_vl8(<8 x i32> %v1) vscale_range(2,2) {
; RV32-LABEL: m2_pair_swap_vl8:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv.v.i v10, 0
+; RV32-NEXT: vmv.v.i v12, 0
; RV32-NEXT: li a0, 32
; RV32-NEXT: li a1, 63
-; RV32-NEXT: vwsubu.vx v12, v10, a0
+; RV32-NEXT: vwsubu.vx v10, v12, a0
; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV32-NEXT: vmv.v.x v10, a0
-; RV32-NEXT: vand.vx v12, v12, a1
+; RV32-NEXT: vmv.v.x v12, a0
; RV32-NEXT: vand.vx v10, v10, a1
-; RV32-NEXT: vsrl.vv v12, v8, v12
-; RV32-NEXT: vsll.vv v8, v8, v10
-; RV32-NEXT: vor.vv v8, v8, v12
+; RV32-NEXT: vand.vx v12, v12, a1
+; RV32-NEXT: vsrl.vv v10, v8, v10
+; RV32-NEXT: vsll.vv v8, v8, v12
+; RV32-NEXT: vor.vv v8, v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: m2_pair_swap_vl8:
@@ -183,17 +183,17 @@ define void @shuffle1(ptr %explicit_0, ptr %explicit_1) vscale_range(2,2) {
; CHECK: # %bb.0:
; CHECK-NEXT: addi a0, a0, 252
; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vle32.v v10, (a0)
; CHECK-NEXT: vmv.v.i v0, 1
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT: vslidedown.vi v8, v8, 1, v0.t
+; CHECK-NEXT: vslidedown.vi v10, v10, 1, v0.t
; CHECK-NEXT: vmv.v.i v0, 5
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vmv.v.i v10, 0
+; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vmerge.vvm v11, v11, v8, v0
+; CHECK-NEXT: vmerge.vvm v9, v9, v10, v0
; CHECK-NEXT: addi a0, a1, 672
-; CHECK-NEXT: vs2r.v v10, (a0)
+; CHECK-NEXT: vs2r.v v8, (a0)
; CHECK-NEXT: ret
%1 = getelementptr i32, ptr %explicit_0, i64 63
%2 = load <3 x i32>, ptr %1, align 1
@@ -338,20 +338,20 @@ define i64 @multi_chunks_shuffle(<32 x i32> %0) vscale_range(8,8) {
; RV32-LABEL: multi_chunks_shuffle:
; RV32: # %bb.0: # %entry
; RV32-NEXT: vsetivli zero, 16, e32, m1, ta, ma
-; RV32-NEXT: vmv.v.i v10, 0
+; RV32-NEXT: vmv.v.i v12, 0
; RV32-NEXT: li a0, 32
; RV32-NEXT: li a1, 63
-; RV32-NEXT: vwsubu.vx v12, v10, a0
+; RV32-NEXT: vwsubu.vx v10, v12, a0
; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV32-NEXT: vmv.v.x v10, a0
+; RV32-NEXT: vmv.v.x v12, a0
; RV32-NEXT: lui a0, 61681
; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v12, v12, a1
; RV32-NEXT: vand.vx v10, v10, a1
-; RV32-NEXT: vsrl.vv v12, v8, v12
-; RV32-NEXT: vsll.vv v8, v8, v10
+; RV32-NEXT: vand.vx v12, v12, a1
+; RV32-NEXT: vsrl.vv v10, v8, v10
+; RV32-NEXT: vsll.vv v8, v8, v12
; RV32-NEXT: vmv.s.x v0, a0
-; RV32-NEXT: vor.vv v8, v8, v12
+; RV32-NEXT: vor.vv v8, v8, v10
; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; RV32-NEXT: vmv.v.i v10, 0
; RV32-NEXT: vmerge.vvm v8, v10, v8, v0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll
index c222626a166fe..a6d5fd3e0e51a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll
@@ -134,15 +134,15 @@ define <64 x i1> @reverse_v64i1(<64 x i1> %a) {
; NO-ZVBB-NEXT: slli a1, a1, 2
; NO-ZVBB-NEXT: vmerge.vim v8, v8, 1, v0
; NO-ZVBB-NEXT: vsetvli a3, zero, e16, m2, ta, ma
-; NO-ZVBB-NEXT: vrsub.vx v12, v12, a2
+; NO-ZVBB-NEXT: vrsub.vx v16, v12, a2
; NO-ZVBB-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; NO-ZVBB-NEXT: vrgatherei16.vv v19, v8, v12
-; NO-ZVBB-NEXT: vrgatherei16.vv v18, v9, v12
-; NO-ZVBB-NEXT: vrgatherei16.vv v17, v10, v12
-; NO-ZVBB-NEXT: vrgatherei16.vv v16, v11, v12
+; NO-ZVBB-NEXT: vrgatherei16.vv v15, v8, v16
+; NO-ZVBB-NEXT: vrgatherei16.vv v14, v9, v16
+; NO-ZVBB-NEXT: vrgatherei16.vv v13, v10, v16
+; NO-ZVBB-NEXT: vrgatherei16.vv v12, v11, v16
; NO-ZVBB-NEXT: addi a1, a1, -64
; NO-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; NO-ZVBB-NEXT: vslidedown.vx v8, v16, a1
+; NO-ZVBB-NEXT: vslidedown.vx v8, v12, a1
; NO-ZVBB-NEXT: vmsne.vi v0, v8, 0
; NO-ZVBB-NEXT: ret
;
@@ -161,14 +161,14 @@ define <128 x i1> @reverse_v128i1(<128 x i1> %a) {
; CHECK-NEXT: li a0, 128
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
-; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: vid.v v24
; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
-; CHECK-NEXT: vmv.v.i v16, 0
+; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: addi a2, a1, -1
; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: vmerge.vim v16, v16, 1, v0
+; CHECK-NEXT: vmerge.vim v16, v8, 1, v0
; CHECK-NEXT: vsetvli a3, zero, e16, m2, ta, ma
-; CHECK-NEXT: vrsub.vx v24, v8, a2
+; CHECK-NEXT: vrsub.vx v24, v24, a2
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: vrgatherei16.vv v15, v16, v24
; CHECK-NEXT: vrgatherei16.vv v14, v17, v24
@@ -281,16 +281,16 @@ define <64 x i8> @reverse_v64i8(<64 x i8> %a) {
; CHECK-NEXT: vid.v v12
; CHECK-NEXT: addi a1, a0, -1
; CHECK-NEXT: slli a0, a0, 2
-; CHECK-NEXT: vrsub.vx v12, v12, a1
+; CHECK-NEXT: vrsub.vx v16, v12, a1
; CHECK-NEXT: addi a0, a0, -64
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT: vrgatherei16.vv v19, v8, v12
-; CHECK-NEXT: vrgatherei16.vv v18, v9, v12
-; CHECK-NEXT: vrgatherei16.vv v17, v10, v12
-; CHECK-NEXT: vrgatherei16.vv v16, v11, v12
+; CHECK-NEXT: vrgatherei16.vv v15, v8, v16
+; CHECK-NEXT: vrgatherei16.vv v14, v9, v16
+; CHECK-NEXT: vrgatherei16.vv v13, v10, v16
+; CHECK-NEXT: vrgatherei16.vv v12, v11, v16
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v16, a0
+; CHECK-NEXT: vslidedown.vx v8, v12, a0
; CHECK-NEXT: ret
%res = shufflevector <64 x i8> %a, <64 x i8> poison, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <64 x i8> %res
@@ -356,12 +356,12 @@ define <16 x i16> @reverse_v16i16(<16 x i16> %a) {
; CHECK-NEXT: vid.v v10
; CHECK-NEXT: srli a1, a0, 1
; CHECK-NEXT: addi a1, a1, -1
-; CHECK-NEXT: vrsub.vx v10, v10, a1
-; CHECK-NEXT: vrgather.vv v13, v8, v10
-; CHECK-NEXT: vrgather.vv v12, v9, v10
+; CHECK-NEXT: vrsub.vx v12, v10, a1
+; CHECK-NEXT: vrgather.vv v11, v8, v12
+; CHECK-NEXT: vrgather.vv v10, v9, v12
; CHECK-NEXT: addi a0, a0, -16
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v12, a0
+; CHECK-NEXT: vslidedown.vx v8, v10, a0
; CHECK-NEXT: ret
%res = shufflevector <16 x i16> %a, <16 x i16> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <16 x i16> %res
@@ -377,14 +377,14 @@ define <32 x i16> @reverse_v32i16(<32 x i16> %a) {
; CHECK-NEXT: slli a0, a0, 1
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: addi a0, a0, -32
-; CHECK-NEXT: vrsub.vx v12, v12, a1
-; CHECK-NEXT: vrgather.vv v19, v8, v12
-; CHECK-NEXT: vrgather.vv v18, v9, v12
-; CHECK-NEXT: vrgather.vv v17, v10, v12
-; CHECK-NEXT: vrgather.vv v16, v11, v12
+; CHECK-NEXT: vrsub.vx v16, v12, a1
+; CHECK-NEXT: vrgather.vv v15, v8, v16
+; CHECK-NEXT: vrgather.vv v14, v9, v16
+; CHECK-NEXT: vrgather.vv v13, v10, v16
+; CHECK-NEXT: vrgather.vv v12, v11, v16
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v16, a0
+; CHECK-NEXT: vslidedown.vx v8, v12, a0
; CHECK-NEXT: ret
%res = shufflevector <32 x i16> %a, <32 x i16> poison, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <32 x i16> %res
@@ -438,12 +438,12 @@ define <8 x i32> @reverse_v8i32(<8 x i32> %a) {
; CHECK-NEXT: srli a1, a0, 2
; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: addi a1, a1, -1
-; CHECK-NEXT: vrsub.vx v10, v10, a1
-; CHECK-NEXT: vrgather.vv v13, v8, v10
-; CHECK-NEXT: vrgather.vv v12, v9, v10
+; CHECK-NEXT: vrsub.vx v12, v10, a1
+; CHECK-NEXT: vrgather.vv v11, v8, v12
+; CHECK-NEXT: vrgather.vv v10, v9, v12
; CHECK-NEXT: addi a0, a0, -8
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v12, a0
+; CHECK-NEXT: vslidedown.vx v8, v10, a0
; CHECK-NEXT: ret
%res = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <8 x i32> %res
@@ -499,12 +499,12 @@ define <4 x i64> @reverse_v4i64(<4 x i64> %a) {
; CHECK-NEXT: srli a1, a0, 3
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: addi a1, a1, -1
-; CHECK-NEXT: vrsub.vx v10, v10, a1
-; CHECK-NEXT: vrgather.vv v13, v8, v10
-; CHECK-NEXT: vrgather.vv v12, v9, v10
+; CHECK-NEXT: vrsub.vx v12, v10, a1
+; CHECK-NEXT: vrgather.vv v11, v8, v12
+; CHECK-NEXT: vrgather.vv v10, v9, v12
; CHECK-NEXT: addi a0, a0, -4
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v12, a0
+; CHECK-NEXT: vslidedown.vx v8, v10, a0
; CHECK-NEXT: ret
%res = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x i64> %res
@@ -519,14 +519,14 @@ define <8 x i64> @reverse_v8i64(<8 x i64> %a) {
; CHECK-NEXT: srli a1, a0, 3
; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: addi a1, a1, -1
-; CHECK-NEXT: vrsub.vx v12, v12, a1
-; CHECK-NEXT: vrgather.vv v19, v8, v12
-; CHECK-NEXT: vrgather.vv v18, v9, v12
-; CHECK-NEXT: vrgather.vv v17, v10, v12
-; CHECK-NEXT: vrgather.vv v16, v11, v12
+; CHECK-NEXT: vrsub.vx v16, v12, a1
+; CHECK-NEXT: vrgather.vv v15, v8, v16
+; CHECK-NEXT: vrgather.vv v14, v9, v16
+; CHECK-NEXT: vrgather.vv v13, v10, v16
+; CHECK-NEXT: vrgather.vv v12, v11, v16
; CHECK-NEXT: addi a0, a0, -8
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v16, a0
+; CHECK-NEXT: vslidedown.vx v8, v12, a0
; CHECK-NEXT: ret
%res = shufflevector <8 x i64> %a, <8 x i64> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <8 x i64> %res
@@ -593,12 +593,12 @@ define <16 x half> @reverse_v16f16(<16 x half> %a) {
; CHECK-NEXT: vid.v v10
; CHECK-NEXT: srli a1, a0, 1
; CHECK-NEXT: addi a1, a1, -1
-; CHECK-NEXT: vrsub.vx v10, v10, a1
-; CHECK-NEXT: vrgather.vv v13, v8, v10
-; CHECK-NEXT: vrgather.vv v12, v9, v10
+; CHECK-NEXT: vrsub.vx v12, v10, a1
+; CHECK-NEXT: vrgather.vv v11, v8, v12
+; CHECK-NEXT: vrgather.vv v10, v9, v12
; CHECK-NEXT: addi a0, a0, -16
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v12, a0
+; CHECK-NEXT: vslidedown.vx v8, v10, a0
; CHECK-NEXT: ret
%res = shufflevector <16 x half> %a, <16 x half> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <16 x half> %res
@@ -614,14 +614,14 @@ define <32 x half> @reverse_v32f16(<32 x half> %a) {
; CHECK-NEXT: slli a0, a0, 1
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: addi a0, a0, -32
-; CHECK-NEXT: vrsub.vx v12, v12, a1
-; CHECK-NEXT: vrgather.vv v19, v8, v12
-; CHECK-NEXT: vrgather.vv v18, v9, v12
-; CHECK-NEXT: vrgather.vv v17, v10, v12
-; CHECK-NEXT: vrgather.vv v16, v11, v12
+; CHECK-NEXT: vrsub.vx v16, v12, a1
+; CHECK-NEXT: vrgather.vv v15, v8, v16
+; CHECK-NEXT: vrgather.vv v14, v9, v16
+; CHECK-NEXT: vrgather.vv v13, v10, v16
+; CHECK-NEXT: vrgather.vv v12, v11, v16
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v16, a0
+; CHECK-NEXT: vslidedown.vx v8, v12, a0
; CHECK-NEXT: ret
%res = shufflevector <32 x half> %a, <32 x half> poison, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <32 x half> %res
@@ -675,12 +675,12 @@ define <8 x float> @reverse_v8f32(<8 x float> %a) {
; CHECK-NEXT: srli a1, a0, 2
; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: addi a1, a1, -1
-; CHECK-NEXT: vrsub.vx v10, v10, a1
-; CHECK-NEXT: vrgather.vv v13, v8, v10
-; CHECK-NEXT: vrgather.vv v12, v9, v10
+; CHECK-NEXT: vrsub.vx v12, v10, a1
+; CHECK-NEXT: vrgather.vv v11, v8, v12
+; CHECK-NEXT: vrgather.vv v10, v9, v12
; CHECK-NEXT: addi a0, a0, -8
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v12, a0
+; CHECK-NEXT: vslidedown.vx v8, v10, a0
; CHECK-NEXT: ret
%res = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <8 x float> %res
@@ -736,12 +736,12 @@ define <4 x double> @reverse_v4f64(<4 x double> %a) {
; CHECK-NEXT: srli a1, a0, 3
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: addi a1, a1, -1
-; CHECK-NEXT: vrsub.vx v10, v10, a1
-; CHECK-NEXT: vrgather.vv v13, v8, v10
-; CHECK-NEXT: vrgather.vv v12, v9, v10
+; CHECK-NEXT: vrsub.vx v12, v10, a1
+; CHECK-NEXT: vrgather.vv v11, v8, v12
+; CHECK-NEXT: vrgather.vv v10, v9, v12
; CHECK-NEXT: addi a0, a0, -4
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v12, a0
+; CHECK-NEXT: vslidedown.vx v8, v10, a0
; CHECK-NEXT: ret
%res = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x double> %res
@@ -756,14 +756,14 @@ define <8 x double> @reverse_v8f64(<8 x double> %a) {
; CHECK-NEXT: srli a1, a0, 3
; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: addi a1, a1, -1
-; CHECK-NEXT: vrsub.vx v12, v12, a1
-; CHECK-NEXT: vrgather.vv v19, v8, v12
-; CHECK-NEXT: vrgather.vv v18, v9, v12
-; CHECK-NEXT: vrgather.vv v17, v10, v12
-; CHECK-NEXT: vrgather.vv v16, v11, v12
+; CHECK-NEXT: vrsub.vx v16, v12, a1
+; CHECK-NEXT: vrgather.vv v15, v8, v16
+; CHECK-NEXT: vrgather.vv v14, v9, v16
+; CHECK-NEXT: vrgather.vv v13, v10, v16
+; CHECK-NEXT: vrgather.vv v12, v11, v16
; CHECK-NEXT: addi a0, a0, -8
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v16, a0
+; CHECK-NEXT: vslidedown.vx v8, v12, a0
; CHECK-NEXT: ret
%res = shufflevector <8 x double> %a, <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <8 x double> %res
@@ -871,12 +871,12 @@ define <32 x i8> @reverse_v32i8_2(<16 x i8> %a, <16 x i8> %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vid.v v10
-; CHECK-NEXT: vrsub.vi v10, v10, 15
-; CHECK-NEXT: vrgather.vv v12, v8, v10
-; CHECK-NEXT: vrgather.vv v8, v9, v10
+; CHECK-NEXT: vrsub.vi v11, v10, 15
+; CHECK-NEXT: vrgather.vv v10, v8, v11
+; CHECK-NEXT: vrgather.vv v8, v9, v11
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v12, 16
+; CHECK-NEXT: vslideup.vi v8, v10, 16
; CHECK-NEXT: ret
%res = shufflevector <16 x i8> %a, <16 x i8> %b, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <32 x i8> %res
@@ -926,11 +926,11 @@ define <16 x i16> @reverse_v16i16_2(<8 x i16> %a, <8 x i16> %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vid.v v10
-; CHECK-NEXT: vrsub.vi v10, v10, 7
-; CHECK-NEXT: vrgather.vv v12, v8, v10
-; CHECK-NEXT: vrgather.vv v8, v9, v10
+; CHECK-NEXT: vrsub.vi v11, v10, 7
+; CHECK-NEXT: vrgather.vv v10, v8, v11
+; CHECK-NEXT: vrgather.vv v8, v9, v11
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v12, 8
+; CHECK-NEXT: vslideup.vi v8, v10, 8
; CHECK-NEXT: ret
%res = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <16 x i16> %res
@@ -945,14 +945,14 @@ define <32 x i16> @reverse_v32i16_2(<16 x i16> %a, <16 x i16> %b) {
; CHECK-NEXT: srli a1, a0, 1
; CHECK-NEXT: addi a0, a0, -16
; CHECK-NEXT: addi a1, a1, -1
-; CHECK-NEXT: vrsub.vx v12, v12, a1
-; CHECK-NEXT: vrgather.vv v15, v8, v12
-; CHECK-NEXT: vrgather.vv v17, v10, v12
-; CHECK-NEXT: vrgather.vv v14, v9, v12
-; CHECK-NEXT: vrgather.vv v16, v11, v12
+; CHECK-NEXT: vrsub.vx v16, v12, a1
+; CHECK-NEXT: vrgather.vv v13, v8, v16
+; CHECK-NEXT: vrgather.vv v15, v10, v16
+; CHECK-NEXT: vrgather.vv v12, v9, v16
+; CHECK-NEXT: vrgather.vv v14, v11, v16
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT: vslidedown.vx v12, v14, a0
-; CHECK-NEXT: vslidedown.vx v8, v16, a0
+; CHECK-NEXT: vslidedown.vx v12, v12, a0
+; CHECK-NEXT: vslidedown.vx v8, v14, a0
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vslideup.vi v8, v12, 16
@@ -990,11 +990,11 @@ define <8 x i32> @reverse_v8i32_2(<4 x i32> %a, <4 x i32> %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vid.v v10
-; CHECK-NEXT: vrsub.vi v10, v10, 3
-; CHECK-NEXT: vrgather.vv v12, v8, v10
-; CHECK-NEXT: vrgather.vv v8, v9, v10
+; CHECK-NEXT: vrsub.vi v11, v10, 3
+; CHECK-NEXT: vrgather.vv v10, v8, v11
+; CHECK-NEXT: vrgather.vv v8, v9, v11
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v12, 4
+; CHECK-NEXT: vslideup.vi v8, v10, 4
; CHECK-NEXT: ret
%res = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <8 x i32> %res
@@ -1010,14 +1010,14 @@ define <16 x i32> @reverse_v16i32_2(<8 x i32> %a, <8 x i32> %b) {
; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: addi a0, a0, -8
-; CHECK-NEXT: vrsub.vx v12, v12, a1
-; CHECK-NEXT: vrgather.vv v15, v8, v12
-; CHECK-NEXT: vrgather.vv v17, v10, v12
-; CHECK-NEXT: vrgather.vv v14, v9, v12
-; CHECK-NEXT: vrgather.vv v16, v11, v12
+; CHECK-NEXT: vrsub.vx v16, v12, a1
+; CHECK-NEXT: vrgather.vv v13, v8, v16
+; CHECK-NEXT: vrgather.vv v15, v10, v16
+; CHECK-NEXT: vrgather.vv v12, v9, v16
+; CHECK-NEXT: vrgather.vv v14, v11, v16
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vslidedown.vx v12, v14, a0
-; CHECK-NEXT: vslidedown.vx v8, v16, a0
+; CHECK-NEXT: vslidedown.vx v12, v12, a0
+; CHECK-NEXT: vslidedown.vx v8, v14, a0
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vslideup.vi v8, v12, 8
; CHECK-NEXT: ret
@@ -1035,20 +1035,20 @@ define <32 x i32> @reverse_v32i32_2(<16 x i32> %a, <16 x i32> %b) {
; CHECK-NEXT: addi a0, a0, -16
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: vrsub.vx v24, v16, a1
-; CHECK-NEXT: vrgather.vv v23, v8, v24
-; CHECK-NEXT: vrgather.vv v19, v12, v24
-; CHECK-NEXT: vrgather.vv v22, v9, v24
-; CHECK-NEXT: vrgather.vv v18, v13, v24
-; CHECK-NEXT: vrgather.vv v21, v10, v24
-; CHECK-NEXT: vrgather.vv v17, v14, v24
-; CHECK-NEXT: vrgather.vv v20, v11, v24
-; CHECK-NEXT: vrgather.vv v16, v15, v24
+; CHECK-NEXT: vrgather.vv v19, v8, v24
+; CHECK-NEXT: vrgather.vv v23, v12, v24
+; CHECK-NEXT: vrgather.vv v18, v9, v24
+; CHECK-NEXT: vrgather.vv v22, v13, v24
+; CHECK-NEXT: vrgather.vv v17, v10, v24
+; CHECK-NEXT: vrgather.vv v21, v14, v24
+; CHECK-NEXT: vrgather.vv v16, v11, v24
+; CHECK-NEXT: vrgather.vv v20, v15, v24
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT: vslidedown.vx v24, v20, a0
-; CHECK-NEXT: vslidedown.vx v8, v16, a0
+; CHECK-NEXT: vslidedown.vx v16, v16, a0
+; CHECK-NEXT: vslidedown.vx v8, v20, a0
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v24, 16
+; CHECK-NEXT: vslideup.vi v8, v16, 16
; CHECK-NEXT: ret
%res = shufflevector <16 x i32> %a, <16 x i32> %b, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <32 x i32> %res
@@ -1079,14 +1079,14 @@ define <8 x i64> @reverse_v8i64_2(<4 x i64> %a, <4 x i64> %b) {
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: addi a0, a0, -4
-; CHECK-NEXT: vrsub.vx v12, v12, a1
-; CHECK-NEXT: vrgather.vv v15, v8, v12
-; CHECK-NEXT: vrgather.vv v17, v10, v12
-; CHECK-NEXT: vrgather.vv v14, v9, v12
-; CHECK-NEXT: vrgather.vv v16, v11, v12
+; CHECK-NEXT: vrsub.vx v16, v12, a1
+; CHECK-NEXT: vrgather.vv v13, v8, v16
+; CHECK-NEXT: vrgather.vv v15, v10, v16
+; CHECK-NEXT: vrgather.vv v12, v9, v16
+; CHECK-NEXT: vrgather.vv v14, v11, v16
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vslidedown.vx v12, v14, a0
-; CHECK-NEXT: vslidedown.vx v8, v16, a0
+; CHECK-NEXT: vslidedown.vx v12, v12, a0
+; CHECK-NEXT: vslidedown.vx v8, v14, a0
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; CHECK-NEXT: vslideup.vi v8, v12, 4
; CHECK-NEXT: ret
@@ -1138,11 +1138,11 @@ define <16 x half> @reverse_v16f16_2(<8 x half> %a, <8 x half> %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vid.v v10
-; CHECK-NEXT: vrsub.vi v10, v10, 7
-; CHECK-NEXT: vrgather.vv v12, v8, v10
-; CHECK-NEXT: vrgather.vv v8, v9, v10
+; CHECK-NEXT: vrsub.vi v11, v10, 7
+; CHECK-NEXT: vrgather.vv v10, v8, v11
+; CHECK-NEXT: vrgather.vv v8, v9, v11
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v12, 8
+; CHECK-NEXT: vslideup.vi v8, v10, 8
; CHECK-NEXT: ret
%res = shufflevector <8 x half> %a, <8 x half> %b, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <16 x half> %res
@@ -1200,11 +1200,11 @@ define <8 x float> @reverse_v8f32_2(<4 x float> %a, <4 x float> %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vid.v v10
-; CHECK-NEXT: vrsub.vi v10, v10, 3
-; CHECK-NEXT: vrgather.vv v12, v8, v10
-; CHECK-NEXT: vrgather.vv v8, v9, v10
+; CHECK-NEXT: vrsub.vi v11, v10, 3
+; CHECK-NEXT: vrgather.vv v10, v8, v11
+; CHECK-NEXT: vrgather.vv v8, v9, v11
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v12, 4
+; CHECK-NEXT: vslideup.vi v8, v10, 4
; CHECK-NEXT: ret
%res = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <8 x float> %res
@@ -1220,14 +1220,14 @@ define <16 x float> @reverse_v16f32_2(<8 x float> %a, <8 x float> %b) {
; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: addi a0, a0, -8
-; CHECK-NEXT: vrsub.vx v12, v12, a1
-; CHECK-NEXT: vrgather.vv v15, v8, v12
-; CHECK-NEXT: vrgather.vv v17, v10, v12
-; CHECK-NEXT: vrgather.vv v14, v9, v12
-; CHECK-NEXT: vrgather.vv v16, v11, v12
+; CHECK-NEXT: vrsub.vx v16, v12, a1
+; CHECK-NEXT: vrgather.vv v13, v8, v16
+; CHECK-NEXT: vrgather.vv v15, v10, v16
+; CHECK-NEXT: vrgather.vv v12, v9, v16
+; CHECK-NEXT: vrgather.vv v14, v11, v16
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vslidedown.vx v12, v14, a0
-; CHECK-NEXT: vslidedown.vx v8, v16, a0
+; CHECK-NEXT: vslidedown.vx v12, v12, a0
+; CHECK-NEXT: vslidedown.vx v8, v14, a0
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vslideup.vi v8, v12, 8
; CHECK-NEXT: ret
@@ -1260,14 +1260,14 @@ define <8 x double> @reverse_v8f64_2(<4 x double> %a, <4 x double> %b) {
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: addi a0, a0, -4
-; CHECK-NEXT: vrsub.vx v12, v12, a1
-; CHECK-NEXT: vrgather.vv v15, v8, v12
-; CHECK-NEXT: vrgather.vv v17, v10, v12
-; CHECK-NEXT: vrgather.vv v14, v9, v12
-; CHECK-NEXT: vrgather.vv v16, v11, v12
+; CHECK-NEXT: vrsub.vx v16, v12, a1
+; CHECK-NEXT: vrgather.vv v13, v8, v16
+; CHECK-NEXT: vrgather.vv v15, v10, v16
+; CHECK-NEXT: vrgather.vv v12, v9, v16
+; CHECK-NEXT: vrgather.vv v14, v11, v16
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vslidedown.vx v12, v14, a0
-; CHECK-NEXT: vslidedown.vx v8, v16, a0
+; CHECK-NEXT: vslidedown.vx v12, v12, a0
+; CHECK-NEXT: vslidedown.vx v8, v14, a0
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; CHECK-NEXT: vslideup.vi v8, v12, 4
; CHECK-NEXT: ret
@@ -1340,12 +1340,12 @@ define <8 x i32> @reverse_v8i32_undef_suffix(<8 x i32> %a) {
; CHECK-NEXT: srli a1, a0, 2
; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: addi a1, a1, -1
-; CHECK-NEXT: vrsub.vx v10, v10, a1
-; CHECK-NEXT: vrgather.vv v13, v8, v10
-; CHECK-NEXT: vrgather.vv v12, v9, v10
+; CHECK-NEXT: vrsub.vx v12, v10, a1
+; CHECK-NEXT: vrgather.vv v11, v8, v12
+; CHECK-NEXT: vrgather.vv v10, v9, v12
; CHECK-NEXT: addi a0, a0, -8
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v12, a0
+; CHECK-NEXT: vslidedown.vx v8, v10, a0
; CHECK-NEXT: ret
%res = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i32> %res
@@ -1360,12 +1360,12 @@ define <8 x i32> @reverse_v8i32_undef_prefix(<8 x i32> %a) {
; CHECK-NEXT: srli a1, a0, 2
; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: addi a1, a1, -1
-; CHECK-NEXT: vrsub.vx v10, v10, a1
-; CHECK-NEXT: vrgather.vv v13, v8, v10
-; CHECK-NEXT: vrgather.vv v12, v9, v10
+; CHECK-NEXT: vrsub.vx v12, v10, a1
+; CHECK-NEXT: vrgather.vv v11, v8, v12
+; CHECK-NEXT: vrgather.vv v10, v9, v12
; CHECK-NEXT: addi a0, a0, -8
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v12, a0
+; CHECK-NEXT: vslidedown.vx v8, v10, a0
; CHECK-NEXT: ret
%res = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 2, i32 1, i32 0>
ret <8 x i32> %res
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll
index 5b8e312a06ad4..fe2072990e2ac 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll
@@ -615,17 +615,17 @@ define <8 x i32> @shuffle_v8i32_as_i64(<8 x i32> %v) {
; RV32-LABEL: shuffle_v8i32_as_i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv.v.i v10, 0
+; RV32-NEXT: vmv.v.i v12, 0
; RV32-NEXT: li a0, 32
; RV32-NEXT: li a1, 63
-; RV32-NEXT: vwsubu.vx v12, v10, a0
+; RV32-NEXT: vwsubu.vx v10, v12, a0
; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV32-NEXT: vmv.v.x v10, a0
-; RV32-NEXT: vand.vx v12, v12, a1
+; RV32-NEXT: vmv.v.x v12, a0
; RV32-NEXT: vand.vx v10, v10, a1
-; RV32-NEXT: vsrl.vv v12, v8, v12
-; RV32-NEXT: vsll.vv v8, v8, v10
-; RV32-NEXT: vor.vv v8, v8, v12
+; RV32-NEXT: vand.vx v12, v12, a1
+; RV32-NEXT: vsrl.vv v10, v8, v10
+; RV32-NEXT: vsll.vv v8, v8, v12
+; RV32-NEXT: vor.vv v8, v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: shuffle_v8i32_as_i64:
@@ -819,17 +819,17 @@ define <8 x float> @shuffle_v8f32_as_i64(<8 x float> %v) {
; RV32-LABEL: shuffle_v8f32_as_i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv.v.i v10, 0
+; RV32-NEXT: vmv.v.i v12, 0
; RV32-NEXT: li a0, 32
; RV32-NEXT: li a1, 63
-; RV32-NEXT: vwsubu.vx v12, v10, a0
+; RV32-NEXT: vwsubu.vx v10, v12, a0
; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV32-NEXT: vmv.v.x v10, a0
-; RV32-NEXT: vand.vx v12, v12, a1
+; RV32-NEXT: vmv.v.x v12, a0
; RV32-NEXT: vand.vx v10, v10, a1
-; RV32-NEXT: vsrl.vv v12, v8, v12
-; RV32-NEXT: vsll.vv v8, v8, v10
-; RV32-NEXT: vor.vv v8, v8, v12
+; RV32-NEXT: vand.vx v12, v12, a1
+; RV32-NEXT: vsrl.vv v10, v8, v10
+; RV32-NEXT: vsll.vv v8, v8, v12
+; RV32-NEXT: vor.vv v8, v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: shuffle_v8f32_as_i64:
@@ -864,17 +864,17 @@ define <8 x float> @shuffle_v8f32_as_i64_exact(<8 x float> %v) vscale_range(2,2)
; RV32-LABEL: shuffle_v8f32_as_i64_exact:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv.v.i v10, 0
+; RV32-NEXT: vmv.v.i v12, 0
; RV32-NEXT: li a0, 32
; RV32-NEXT: li a1, 63
-; RV32-NEXT: vwsubu.vx v12, v10, a0
+; RV32-NEXT: vwsubu.vx v10, v12, a0
; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV32-NEXT: vmv.v.x v10, a0
-; RV32-NEXT: vand.vx v12, v12, a1
+; RV32-NEXT: vmv.v.x v12, a0
; RV32-NEXT: vand.vx v10, v10, a1
-; RV32-NEXT: vsrl.vv v12, v8, v12
-; RV32-NEXT: vsll.vv v8, v8, v10
-; RV32-NEXT: vor.vv v8, v8, v12
+; RV32-NEXT: vand.vx v12, v12, a1
+; RV32-NEXT: vsrl.vv v10, v8, v10
+; RV32-NEXT: vsll.vv v8, v8, v12
+; RV32-NEXT: vor.vv v8, v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: shuffle_v8f32_as_i64_exact:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll
index 0928a47757430..9279e0a4d3a6c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll
@@ -559,10 +559,10 @@ define void @vnsrl_0_i32_single_src_m8_2(ptr %in, ptr %out) {
; ZVE32F-NEXT: lui a0, 349525
; ZVE32F-NEXT: addi a0, a0, 1365
; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; ZVE32F-NEXT: vmv.v.x v16, a0
+; ZVE32F-NEXT: vmv.v.x v24, a0
; ZVE32F-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; ZVE32F-NEXT: vcompress.vm v24, v8, v16
-; ZVE32F-NEXT: vse32.v v24, (a1)
+; ZVE32F-NEXT: vcompress.vm v16, v8, v24
+; ZVE32F-NEXT: vse32.v v16, (a1)
; ZVE32F-NEXT: ret
entry:
%0 = load <64 x i32>, ptr %in, align 4
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll
index a2d41de5d1853..9f1f98893d04c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll
@@ -345,8 +345,8 @@ define <4 x double> @vsitofp_v4f64_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext
; CHECK-LABEL: vsitofp_v4f64_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v10, v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v10, v0.t
; CHECK-NEXT: ret
%v = call <4 x double> @llvm.vp.sitofp.v4f64.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
ret <4 x double> %v
@@ -356,8 +356,8 @@ define <4 x double> @vsitofp_v4f64_v4i32_unmasked(<4 x i32> %va, i32 zeroext %ev
; CHECK-LABEL: vsitofp_v4f64_v4i32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v10
; CHECK-NEXT: ret
%v = call <4 x double> @llvm.vp.sitofp.v4f64.v4i32(<4 x i32> %va, <4 x i1> splat (i1 true), i32 %evl)
ret <4 x double> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll
index a0d5d2ccc848d..b72e3cfcb920a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll
@@ -345,8 +345,8 @@ define <4 x double> @vuitofp_v4f64_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext
; CHECK-LABEL: vuitofp_v4f64_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v10, v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v10, v0.t
; CHECK-NEXT: ret
%v = call <4 x double> @llvm.vp.uitofp.v4f64.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
ret <4 x double> %v
@@ -356,8 +356,8 @@ define <4 x double> @vuitofp_v4f64_v4i32_unmasked(<4 x i32> %va, i32 zeroext %ev
; CHECK-LABEL: vuitofp_v4f64_v4i32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v10
; CHECK-NEXT: ret
%v = call <4 x double> @llvm.vp.uitofp.v4f64.v4i32(<4 x i32> %va, <4 x i1> splat (i1 true), i32 %evl)
ret <4 x double> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
index 6d9f69f436fc4..29fbb8acc3358 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
@@ -365,13 +365,13 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m)
; RV64-SLOW-NEXT: beqz a0, .LBB6_4
; RV64-SLOW-NEXT: .LBB6_8: # %cond.store5
; RV64-SLOW-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64-SLOW-NEXT: vslidedown.vi v8, v8, 3
+; RV64-SLOW-NEXT: vslidedown.vi v12, v8, 3
; RV64-SLOW-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-SLOW-NEXT: vslidedown.vi v10, v10, 3
+; RV64-SLOW-NEXT: vslidedown.vi v8, v10, 3
; RV64-SLOW-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64-SLOW-NEXT: vmv.x.s a0, v8
+; RV64-SLOW-NEXT: vmv.x.s a0, v12
; RV64-SLOW-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-SLOW-NEXT: vmv.x.s a1, v10
+; RV64-SLOW-NEXT: vmv.x.s a1, v8
; RV64-SLOW-NEXT: srli a2, a0, 8
; RV64-SLOW-NEXT: sb a0, 0(a1)
; RV64-SLOW-NEXT: sb a2, 1(a1)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll
index f80c158324684..8a8fe234cacd1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll
@@ -278,12 +278,12 @@ define <8 x half> @vfadd_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zero
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfadd.vv v10, v10, v12, v0.t
+; ZVFHMIN-NEXT: vfadd.vv v10, v10, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t
; ZVFHMIN-NEXT: ret
@@ -304,12 +304,12 @@ define <8 x half> @vfadd_vf_v8f16_unmasked(<8 x half> %va, half %b, i32 zeroext
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfadd.vv v10, v10, v12
+; ZVFHMIN-NEXT: vfadd.vv v10, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
@@ -374,12 +374,12 @@ define <16 x half> @vfadd_vf_v16f16(<16 x half> %va, half %b, <16 x i1> %m, i32
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfadd.vv v12, v12, v16, v0.t
+; ZVFHMIN-NEXT: vfadd.vv v12, v12, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
; ZVFHMIN-NEXT: ret
@@ -400,12 +400,12 @@ define <16 x half> @vfadd_vf_v16f16_unmasked(<16 x half> %va, half %b, i32 zeroe
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfadd.vv v12, v12, v16
+; ZVFHMIN-NEXT: vfadd.vv v12, v12, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll
index 23baa60de1532..30f509436214a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll
@@ -278,12 +278,12 @@ define <8 x half> @vfdiv_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zero
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v10, v10, v12, v0.t
+; ZVFHMIN-NEXT: vfdiv.vv v10, v10, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t
; ZVFHMIN-NEXT: ret
@@ -304,12 +304,12 @@ define <8 x half> @vfdiv_vf_v8f16_unmasked(<8 x half> %va, half %b, i32 zeroext
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v10, v10, v12
+; ZVFHMIN-NEXT: vfdiv.vv v10, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
@@ -374,12 +374,12 @@ define <16 x half> @vfdiv_vf_v16f16(<16 x half> %va, half %b, <16 x i1> %m, i32
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v12, v12, v16, v0.t
+; ZVFHMIN-NEXT: vfdiv.vv v12, v12, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
; ZVFHMIN-NEXT: ret
@@ -400,12 +400,12 @@ define <16 x half> @vfdiv_vf_v16f16_unmasked(<16 x half> %va, half %b, i32 zeroe
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v12, v12, v16
+; ZVFHMIN-NEXT: vfdiv.vv v12, v12, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll
index 11420a23285d0..c37df892de442 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll
@@ -188,11 +188,11 @@ define <8 x half> @vfmax_v8f16_vf(<8 x half> %a, half %b) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a0
+; ZVFHMIN-NEXT: vmv.v.x v12, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmax.vv v10, v10, v12
+; ZVFHMIN-NEXT: vfmax.vv v10, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
@@ -213,11 +213,11 @@ define <8 x half> @vfmax_v8f16_fv(<8 x half> %a, half %b) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a0
+; ZVFHMIN-NEXT: vmv.v.x v12, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmax.vv v10, v12, v10
+; ZVFHMIN-NEXT: vfmax.vv v10, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
@@ -261,11 +261,11 @@ define <16 x half> @vfmax_v16f16_vf(<16 x half> %a, half %b) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a0
+; ZVFHMIN-NEXT: vmv.v.x v16, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmax.vv v12, v12, v16
+; ZVFHMIN-NEXT: vfmax.vv v12, v12, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
@@ -286,11 +286,11 @@ define <16 x half> @vfmax_v16f16_fv(<16 x half> %a, half %b) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a0
+; ZVFHMIN-NEXT: vmv.v.x v16, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmax.vv v12, v16, v12
+; ZVFHMIN-NEXT: vfmax.vv v12, v8, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll
index e8ae32a45f7cd..90afe36a36c0f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll
@@ -188,11 +188,11 @@ define <8 x half> @vfmin_v8f16_vf(<8 x half> %a, half %b) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a0
+; ZVFHMIN-NEXT: vmv.v.x v12, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmin.vv v10, v10, v12
+; ZVFHMIN-NEXT: vfmin.vv v10, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
@@ -213,11 +213,11 @@ define <8 x half> @vfmin_v8f16_fv(<8 x half> %a, half %b) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a0
+; ZVFHMIN-NEXT: vmv.v.x v12, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmin.vv v10, v12, v10
+; ZVFHMIN-NEXT: vfmin.vv v10, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
@@ -261,11 +261,11 @@ define <16 x half> @vfmin_v16f16_vf(<16 x half> %a, half %b) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a0
+; ZVFHMIN-NEXT: vmv.v.x v16, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmin.vv v12, v12, v16
+; ZVFHMIN-NEXT: vfmin.vv v12, v12, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
@@ -286,11 +286,11 @@ define <16 x half> @vfmin_v16f16_fv(<16 x half> %a, half %b) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a0
+; ZVFHMIN-NEXT: vmv.v.x v16, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmin.vv v12, v16, v12
+; ZVFHMIN-NEXT: vfmin.vv v12, v8, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll
index 7e03a3cf95577..3c0819e549552 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll
@@ -278,12 +278,12 @@ define <8 x half> @vfmul_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zero
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmul.vv v10, v10, v12, v0.t
+; ZVFHMIN-NEXT: vfmul.vv v10, v10, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t
; ZVFHMIN-NEXT: ret
@@ -304,12 +304,12 @@ define <8 x half> @vfmul_vf_v8f16_unmasked(<8 x half> %va, half %b, i32 zeroext
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmul.vv v10, v10, v12
+; ZVFHMIN-NEXT: vfmul.vv v10, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
@@ -374,12 +374,12 @@ define <16 x half> @vfmul_vf_v16f16(<16 x half> %va, half %b, <16 x i1> %m, i32
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmul.vv v12, v12, v16, v0.t
+; ZVFHMIN-NEXT: vfmul.vv v12, v12, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
; ZVFHMIN-NEXT: ret
@@ -400,12 +400,12 @@ define <16 x half> @vfmul_vf_v16f16_unmasked(<16 x half> %va, half %b, i32 zeroe
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmul.vv v12, v12, v16
+; ZVFHMIN-NEXT: vfmul.vv v12, v12, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfpext-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfpext-constrained-sdnode.ll
index 59fd8bbd17953..b8a6be40b3f32 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfpext-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfpext-constrained-sdnode.ll
@@ -59,8 +59,8 @@ define <8 x float> @vfpext_v8f16_v8f32(<8 x half> %va) strictfp {
; CHECK-LABEL: vfpext_v8f16_v8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.f.v v8, v10
; CHECK-NEXT: ret
%evec = call <8 x float> @llvm.experimental.constrained.fpext.v8f32.v8f16(<8 x half> %va, metadata !"fpexcept.strict")
ret <8 x float> %evec
@@ -96,8 +96,8 @@ define <4 x double> @vfpext_v4f32_v4f64(<4 x float> %va) strictfp {
; CHECK-LABEL: vfpext_v4f32_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.f.v v8, v10
; CHECK-NEXT: ret
%evec = call <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float> %va, metadata !"fpexcept.strict")
ret <4 x double> %evec
@@ -108,8 +108,8 @@ define <8 x double> @vfpext_v8f32_v8f64(<8 x float> %va) strictfp {
; CHECK-LABEL: vfpext_v8f32_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.f.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.f.f.v v8, v12
; CHECK-NEXT: ret
%evec = call <8 x double> @llvm.experimental.constrained.fpext.v8f64.v8f32(<8 x float> %va, metadata !"fpexcept.strict")
ret <8 x double> %evec
@@ -170,8 +170,8 @@ define <8 x float> @vfpext_v8bf16_v8f32(<8 x bfloat> %va) strictfp {
; CHECK-LABEL: vfpext_v8bf16_v8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
; CHECK-NEXT: ret
%evec = call <8 x float> @llvm.experimental.constrained.fpext.v8f32.v8bf16(<8 x bfloat> %va, metadata !"fpexcept.strict")
ret <8 x float> %evec
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfptoi-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfptoi-constrained-sdnode.ll
index 5ac0d8d120cba..ac58a597a0812 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfptoi-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfptoi-constrained-sdnode.ll
@@ -485,8 +485,8 @@ define <8 x i32> @vfptosi_v8f16_v8i32(<8 x half> %va) strictfp {
; CHECK-LABEL: vfptosi_v8f16_v8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v10
; CHECK-NEXT: ret
%evec = call <8 x i32> @llvm.experimental.constrained.fptosi.v8i32.v8f16(<8 x half> %va, metadata !"fpexcept.strict")
ret <8 x i32> %evec
@@ -497,8 +497,8 @@ define <8 x i32> @vfptoui_v8f16_v8i32(<8 x half> %va) strictfp {
; CHECK-LABEL: vfptoui_v8f16_v8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v10
; CHECK-NEXT: ret
%evec = call <8 x i32> @llvm.experimental.constrained.fptoui.v8i32.v8f16(<8 x half> %va, metadata !"fpexcept.strict")
ret <8 x i32> %evec
@@ -607,8 +607,8 @@ define <16 x i32> @vfptosi_v16f16_v16i32(<16 x half> %va) strictfp {
; CHECK-LABEL: vfptosi_v16f16_v16i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v12
; CHECK-NEXT: ret
%evec = call <16 x i32> @llvm.experimental.constrained.fptosi.v16i32.v16f16(<16 x half> %va, metadata !"fpexcept.strict")
ret <16 x i32> %evec
@@ -619,8 +619,8 @@ define <16 x i32> @vfptoui_v16f16_v16i32(<16 x half> %va) strictfp {
; CHECK-LABEL: vfptoui_v16f16_v16i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v12
; CHECK-NEXT: ret
%evec = call <16 x i32> @llvm.experimental.constrained.fptoui.v16i32.v16f16(<16 x half> %va, metadata !"fpexcept.strict")
ret <16 x i32> %evec
@@ -1051,8 +1051,8 @@ define <4 x i64> @vfptosi_v4f32_v4i64(<4 x float> %va) strictfp {
; CHECK-LABEL: vfptosi_v4f32_v4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v10
; CHECK-NEXT: ret
%evec = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32(<4 x float> %va, metadata !"fpexcept.strict")
ret <4 x i64> %evec
@@ -1063,8 +1063,8 @@ define <4 x i64> @vfptoui_v4f32_v4i64(<4 x float> %va) strictfp {
; CHECK-LABEL: vfptoui_v4f32_v4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v10
; CHECK-NEXT: ret
%evec = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32(<4 x float> %va, metadata !"fpexcept.strict")
ret <4 x i64> %evec
@@ -1173,8 +1173,8 @@ define <8 x i64> @vfptosi_v8f32_v8i64(<8 x float> %va) strictfp {
; CHECK-LABEL: vfptosi_v8f32_v8i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v12
; CHECK-NEXT: ret
%evec = call <8 x i64> @llvm.experimental.constrained.fptosi.v8i64.v8f32(<8 x float> %va, metadata !"fpexcept.strict")
ret <8 x i64> %evec
@@ -1185,8 +1185,8 @@ define <8 x i64> @vfptoui_v8f32_v8i64(<8 x float> %va) strictfp {
; CHECK-LABEL: vfptoui_v8f32_v8i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v12
; CHECK-NEXT: ret
%evec = call <8 x i64> @llvm.experimental.constrained.fptoui.v8i64.v8f32(<8 x float> %va, metadata !"fpexcept.strict")
ret <8 x i64> %evec
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll
index 58a510047d625..0f3a6de4f4a90 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll
@@ -278,12 +278,12 @@ define <8 x half> @vfsub_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zero
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfsub.vv v10, v10, v12, v0.t
+; ZVFHMIN-NEXT: vfsub.vv v10, v10, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t
; ZVFHMIN-NEXT: ret
@@ -304,12 +304,12 @@ define <8 x half> @vfsub_vf_v8f16_unmasked(<8 x half> %va, half %b, i32 zeroext
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfsub.vv v10, v10, v12
+; ZVFHMIN-NEXT: vfsub.vv v10, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
@@ -374,12 +374,12 @@ define <16 x half> @vfsub_vf_v16f16(<16 x half> %va, half %b, <16 x i1> %m, i32
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsub.vv v12, v12, v16, v0.t
+; ZVFHMIN-NEXT: vfsub.vv v12, v12, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
; ZVFHMIN-NEXT: ret
@@ -400,12 +400,12 @@ define <16 x half> @vfsub_vf_v16f16_unmasked(<16 x half> %va, half %b, i32 zeroe
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsub.vv v12, v12, v16
+; ZVFHMIN-NEXT: vfsub.vv v12, v12, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmacc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmacc.ll
index a48be7687106e..a9e9b757f372e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmacc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmacc.ll
@@ -1868,11 +1868,11 @@ define <8 x double> @vfwmacc_vf_v8f64_v8f16(<8 x double> %va, <8 x half> %vb, ha
; CHECK-LABEL: vfwmacc_vf_v8f64_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.v.f v13, fa0
+; CHECK-NEXT: vfmv.v.f v16, fa0
; CHECK-NEXT: vfwcvt.f.f.v v14, v12
-; CHECK-NEXT: vfwcvt.f.f.v v16, v13
+; CHECK-NEXT: vfwcvt.f.f.v v12, v16
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfwmacc.vv v8, v14, v16
+; CHECK-NEXT: vfwmacc.vv v8, v14, v12
; CHECK-NEXT: ret
%head = insertelement <8 x half> poison, half %c, i32 0
%splat = shufflevector <8 x half> %head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -1903,11 +1903,11 @@ define <8 x double> @vfwnmacc_vf_v8f64_v8f16(<8 x double> %va, <8 x half> %vb, h
; CHECK-LABEL: vfwnmacc_vf_v8f64_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.v.f v13, fa0
+; CHECK-NEXT: vfmv.v.f v16, fa0
; CHECK-NEXT: vfwcvt.f.f.v v14, v12
-; CHECK-NEXT: vfwcvt.f.f.v v16, v13
+; CHECK-NEXT: vfwcvt.f.f.v v12, v16
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfwnmacc.vv v8, v14, v16
+; CHECK-NEXT: vfwnmacc.vv v8, v14, v12
; CHECK-NEXT: ret
%head = insertelement <8 x half> poison, half %c, i32 0
%splat = shufflevector <8 x half> %head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -1923,11 +1923,11 @@ define <8 x double> @vfwnmacc_fv_v8f64_v8f16(<8 x double> %va, <8 x half> %vb, h
; CHECK-LABEL: vfwnmacc_fv_v8f64_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.v.f v13, fa0
+; CHECK-NEXT: vfmv.v.f v16, fa0
; CHECK-NEXT: vfwcvt.f.f.v v14, v12
-; CHECK-NEXT: vfwcvt.f.f.v v16, v13
+; CHECK-NEXT: vfwcvt.f.f.v v12, v16
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfwnmacc.vv v8, v14, v16
+; CHECK-NEXT: vfwnmacc.vv v8, v14, v12
; CHECK-NEXT: ret
%head = insertelement <8 x half> poison, half %c, i32 0
%splat = shufflevector <8 x half> %head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -1959,11 +1959,11 @@ define <8 x double> @vfwmsac_vf_v8f64_v8f16(<8 x double> %va, <8 x half> %vb, ha
; CHECK-LABEL: vfwmsac_vf_v8f64_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.v.f v13, fa0
+; CHECK-NEXT: vfmv.v.f v16, fa0
; CHECK-NEXT: vfwcvt.f.f.v v14, v12
-; CHECK-NEXT: vfwcvt.f.f.v v16, v13
+; CHECK-NEXT: vfwcvt.f.f.v v12, v16
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfwmsac.vv v8, v14, v16
+; CHECK-NEXT: vfwmsac.vv v8, v14, v12
; CHECK-NEXT: ret
%head = insertelement <8 x half> poison, half %c, i32 0
%splat = shufflevector <8 x half> %head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -1994,11 +1994,11 @@ define <8 x double> @vfwnmsac_vf_v8f64_v8f16(<8 x double> %va, <8 x half> %vb, h
; CHECK-LABEL: vfwnmsac_vf_v8f64_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.v.f v13, fa0
+; CHECK-NEXT: vfmv.v.f v16, fa0
; CHECK-NEXT: vfwcvt.f.f.v v14, v12
-; CHECK-NEXT: vfwcvt.f.f.v v16, v13
+; CHECK-NEXT: vfwcvt.f.f.v v12, v16
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfwnmsac.vv v8, v14, v16
+; CHECK-NEXT: vfwnmsac.vv v8, v14, v12
; CHECK-NEXT: ret
%head = insertelement <8 x half> poison, half %c, i32 0
%splat = shufflevector <8 x half> %head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -2013,11 +2013,11 @@ define <8 x double> @vfwnmsac_fv_v8f64_v8f16(<8 x double> %va, <8 x half> %vb, h
; CHECK-LABEL: vfwnmsac_fv_v8f64_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.v.f v13, fa0
+; CHECK-NEXT: vfmv.v.f v16, fa0
; CHECK-NEXT: vfwcvt.f.f.v v14, v12
-; CHECK-NEXT: vfwcvt.f.f.v v16, v13
+; CHECK-NEXT: vfwcvt.f.f.v v12, v16
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfwnmsac.vv v8, v14, v16
+; CHECK-NEXT: vfwnmsac.vv v8, v14, v12
; CHECK-NEXT: ret
%head = insertelement <8 x half> poison, half %c, i32 0
%splat = shufflevector <8 x half> %head, <8 x half> poison, <8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vitofp-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vitofp-constrained-sdnode.ll
index 5eb54fc7e299a..ce5483e3ae8cc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vitofp-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vitofp-constrained-sdnode.ll
@@ -725,8 +725,8 @@ define <16 x half> @vsitofp_v16i8_v16f16(<16 x i8> %va) strictfp {
; CHECK-LABEL: vsitofp_v16i8_v16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v10
; CHECK-NEXT: ret
%evec = call <16 x half> @llvm.experimental.constrained.sitofp.v16f16.v16i8(<16 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <16 x half> %evec
@@ -737,8 +737,8 @@ define <16 x half> @vuitofp_v16i8_v16f16(<16 x i8> %va) strictfp {
; CHECK-LABEL: vuitofp_v16i8_v16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v10
; CHECK-NEXT: ret
%evec = call <16 x half> @llvm.experimental.constrained.uitofp.v16f16.v16i8(<16 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <16 x half> %evec
@@ -772,10 +772,11 @@ declare <32 x half> @llvm.experimental.constrained.sitofp.v32f16.v32i8(<32 x i8>
define <32 x half> @vsitofp_v32i8_v32f16(<32 x i8> %va) strictfp {
; CHECK-LABEL: vsitofp_v32i8_v32f16:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv2r.v v12, v8
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vfwcvt.f.x.v v8, v12
; CHECK-NEXT: ret
%evec = call <32 x half> @llvm.experimental.constrained.sitofp.v32f16.v32i8(<32 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <32 x half> %evec
@@ -785,10 +786,11 @@ declare <32 x half> @llvm.experimental.constrained.uitofp.v32f16.v32i8(<32 x i8>
define <32 x half> @vuitofp_v32i8_v32f16(<32 x i8> %va) strictfp {
; CHECK-LABEL: vuitofp_v32i8_v32f16:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv2r.v v12, v8
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v12
; CHECK-NEXT: ret
%evec = call <32 x half> @llvm.experimental.constrained.uitofp.v32f16.v32i8(<32 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <32 x half> %evec
@@ -1031,8 +1033,8 @@ define <8 x float> @vsitofp_v8i16_v8f32(<8 x i16> %va) strictfp {
; CHECK-LABEL: vsitofp_v8i16_v8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v10
; CHECK-NEXT: ret
%evec = call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i16(<8 x i16> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <8 x float> %evec
@@ -1043,8 +1045,8 @@ define <8 x float> @vuitofp_v8i16_v8f32(<8 x i16> %va) strictfp {
; CHECK-LABEL: vuitofp_v8i16_v8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v10
; CHECK-NEXT: ret
%evec = call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i16(<8 x i16> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <8 x float> %evec
@@ -1101,8 +1103,8 @@ define <16 x float> @vsitofp_v16i16_v16f32(<16 x i16> %va) strictfp {
; CHECK-LABEL: vsitofp_v16i16_v16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v12
; CHECK-NEXT: ret
%evec = call <16 x float> @llvm.experimental.constrained.sitofp.v16f32.v16i16(<16 x i16> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <16 x float> %evec
@@ -1113,8 +1115,8 @@ define <16 x float> @vuitofp_v16i16_v16f32(<16 x i16> %va) strictfp {
; CHECK-LABEL: vuitofp_v16i16_v16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v12
; CHECK-NEXT: ret
%evec = call <16 x float> @llvm.experimental.constrained.uitofp.v16f32.v16i16(<16 x i16> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <16 x float> %evec
@@ -1335,8 +1337,8 @@ define <4 x double> @vsitofp_v4i32_v4f64(<4 x i32> %va) strictfp {
; CHECK-LABEL: vsitofp_v4i32_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v10
; CHECK-NEXT: ret
%evec = call <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i32(<4 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <4 x double> %evec
@@ -1347,8 +1349,8 @@ define <4 x double> @vuitofp_v4i32_v4f64(<4 x i32> %va) strictfp {
; CHECK-LABEL: vuitofp_v4i32_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v10
; CHECK-NEXT: ret
%evec = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i32(<4 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <4 x double> %evec
@@ -1405,8 +1407,8 @@ define <8 x double> @vsitofp_v8i32_v8f64(<8 x i32> %va) strictfp {
; CHECK-LABEL: vsitofp_v8i32_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v12
; CHECK-NEXT: ret
%evec = call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i32(<8 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <8 x double> %evec
@@ -1417,8 +1419,8 @@ define <8 x double> @vuitofp_v8i32_v8f64(<8 x i32> %va) strictfp {
; CHECK-LABEL: vuitofp_v8i32_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v12
; CHECK-NEXT: ret
%evec = call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i32(<8 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <8 x double> %evec
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll
index 64f1819f0e329..83a195a66a502 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll
@@ -875,8 +875,8 @@ define <4 x i64> @vpgather_v4i64(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl)
; RV32-LABEL: vpgather_v4i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
-; RV32-NEXT: vmv.v.v v8, v10
+; RV32-NEXT: vmv1r.v v10, v8
+; RV32-NEXT: vluxei32.v v8, (zero), v10, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_v4i64:
@@ -892,8 +892,8 @@ define <4 x i64> @vpgather_truemask_v4i64(<4 x ptr> %ptrs, i32 zeroext %evl) {
; RV32-LABEL: vpgather_truemask_v4i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vluxei32.v v10, (zero), v8
-; RV32-NEXT: vmv.v.v v8, v10
+; RV32-NEXT: vmv1r.v v10, v8
+; RV32-NEXT: vluxei32.v v8, (zero), v10
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_truemask_v4i64:
@@ -911,8 +911,8 @@ define <8 x i64> @vpgather_v8i64(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl)
; RV32-LABEL: vpgather_v8i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t
-; RV32-NEXT: vmv.v.v v8, v12
+; RV32-NEXT: vmv2r.v v12, v8
+; RV32-NEXT: vluxei32.v v8, (zero), v12, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_v8i64:
@@ -1757,8 +1757,8 @@ define <4 x double> @vpgather_v4f64(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %e
; RV32-LABEL: vpgather_v4f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
-; RV32-NEXT: vmv.v.v v8, v10
+; RV32-NEXT: vmv1r.v v10, v8
+; RV32-NEXT: vluxei32.v v8, (zero), v10, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_v4f64:
@@ -1774,8 +1774,8 @@ define <4 x double> @vpgather_truemask_v4f64(<4 x ptr> %ptrs, i32 zeroext %evl)
; RV32-LABEL: vpgather_truemask_v4f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vluxei32.v v10, (zero), v8
-; RV32-NEXT: vmv.v.v v8, v10
+; RV32-NEXT: vmv1r.v v10, v8
+; RV32-NEXT: vluxei32.v v8, (zero), v10
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_truemask_v4f64:
@@ -1793,8 +1793,8 @@ define <8 x double> @vpgather_v8f64(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %e
; RV32-LABEL: vpgather_v8f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t
-; RV32-NEXT: vmv.v.v v8, v12
+; RV32-NEXT: vmv2r.v v12, v8
+; RV32-NEXT: vluxei32.v v8, (zero), v12, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_v8f64:
@@ -2117,12 +2117,12 @@ define <32 x double> @vpgather_baseidx_v32i8_v32f64(ptr %base, <32 x i8> %idxs,
; RV64-LABEL: vpgather_baseidx_v32i8_v32f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 16
+; RV64-NEXT: vslidedown.vi v16, v8, 16
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vsext.vf8 v24, v8
; RV64-NEXT: li a3, 16
-; RV64-NEXT: vsext.vf8 v16, v10
-; RV64-NEXT: vsll.vi v16, v16, 3
+; RV64-NEXT: vsext.vf8 v8, v16
+; RV64-NEXT: vsll.vi v16, v8, 3
; RV64-NEXT: vsll.vi v8, v24, 3
; RV64-NEXT: mv a2, a1
; RV64-NEXT: bltu a1, a3, .LBB95_2
@@ -2177,11 +2177,11 @@ define <32 x double> @vpgather_baseidx_sext_v32i8_v32f64(ptr %base, <32 x i8> %i
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vsext.vf8 v24, v8
; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 16
+; RV64-NEXT: vslidedown.vi v16, v8, 16
; RV64-NEXT: li a3, 16
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vsext.vf8 v16, v8
-; RV64-NEXT: vsll.vi v16, v16, 3
+; RV64-NEXT: vsext.vf8 v8, v16
+; RV64-NEXT: vsll.vi v16, v8, 3
; RV64-NEXT: vsll.vi v8, v24, 3
; RV64-NEXT: mv a2, a1
; RV64-NEXT: bltu a1, a3, .LBB96_2
@@ -2293,12 +2293,12 @@ define <32 x double> @vpgather_baseidx_v32i16_v32f64(ptr %base, <32 x i16> %idxs
; RV64-LABEL: vpgather_baseidx_v32i16_v32f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma
-; RV64-NEXT: vslidedown.vi v12, v8, 16
+; RV64-NEXT: vslidedown.vi v16, v8, 16
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vsext.vf4 v24, v8
; RV64-NEXT: li a3, 16
-; RV64-NEXT: vsext.vf4 v16, v12
-; RV64-NEXT: vsll.vi v16, v16, 3
+; RV64-NEXT: vsext.vf4 v8, v16
+; RV64-NEXT: vsll.vi v16, v8, 3
; RV64-NEXT: vsll.vi v8, v24, 3
; RV64-NEXT: mv a2, a1
; RV64-NEXT: bltu a1, a3, .LBB98_2
@@ -2353,11 +2353,11 @@ define <32 x double> @vpgather_baseidx_sext_v32i16_v32f64(ptr %base, <32 x i16>
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vsext.vf4 v24, v8
; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 16
+; RV64-NEXT: vslidedown.vi v16, v8, 16
; RV64-NEXT: li a3, 16
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vsext.vf4 v16, v8
-; RV64-NEXT: vsll.vi v16, v16, 3
+; RV64-NEXT: vsext.vf4 v8, v16
+; RV64-NEXT: vsll.vi v16, v8, 3
; RV64-NEXT: vsll.vi v8, v24, 3
; RV64-NEXT: mv a2, a1
; RV64-NEXT: bltu a1, a3, .LBB99_2
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrol.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrol.ll
index 5975b0d0761eb..32ae81926bbee 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrol.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrol.ll
@@ -1016,12 +1016,12 @@ define <4 x i64> @vrol_vx_v4i64(<4 x i64> %a, i64 %b) {
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vmv.v.x v10, a0
; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32-NEXT: vmv.v.i v12, 0
-; RV32-NEXT: vwsub.vx v14, v12, a0
+; RV32-NEXT: vmv.v.i v14, 0
+; RV32-NEXT: vwsub.vx v12, v14, a0
; RV32-NEXT: li a0, 63
; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vand.vx v12, v14, a0
+; RV32-NEXT: vand.vx v12, v12, a0
; RV32-NEXT: vsrl.vv v12, v8, v12
; RV32-NEXT: vsll.vv v8, v8, v10
; RV32-NEXT: vor.vv v8, v8, v12
@@ -1081,12 +1081,12 @@ define <8 x i64> @vrol_vx_v8i64(<8 x i64> %a, i64 %b) {
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vmv.v.x v12, a0
; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.i v16, 0
-; RV32-NEXT: vwsub.vx v20, v16, a0
+; RV32-NEXT: vmv.v.i v20, 0
+; RV32-NEXT: vwsub.vx v16, v20, a0
; RV32-NEXT: li a0, 63
; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vand.vx v16, v20, a0
+; RV32-NEXT: vand.vx v16, v16, a0
; RV32-NEXT: vsrl.vv v16, v8, v16
; RV32-NEXT: vsll.vv v8, v8, v12
; RV32-NEXT: vor.vv v8, v8, v16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll
index 68a9e217ccd1c..ec22d2be1eaad 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll
@@ -1833,12 +1833,12 @@ define <4 x i64> @vror_vx_v4i64(<4 x i64> %a, i64 %b) {
; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-RV32-NEXT: vmv.v.x v10, a0
; CHECK-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.v.i v12, 0
-; CHECK-RV32-NEXT: vwsub.vx v14, v12, a0
+; CHECK-RV32-NEXT: vmv.v.i v14, 0
+; CHECK-RV32-NEXT: vwsub.vx v12, v14, a0
; CHECK-RV32-NEXT: li a0, 63
; CHECK-RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-RV32-NEXT: vand.vx v10, v10, a0
-; CHECK-RV32-NEXT: vand.vx v12, v14, a0
+; CHECK-RV32-NEXT: vand.vx v12, v12, a0
; CHECK-RV32-NEXT: vsll.vv v12, v8, v12
; CHECK-RV32-NEXT: vsrl.vv v8, v8, v10
; CHECK-RV32-NEXT: vor.vv v8, v8, v12
@@ -1872,17 +1872,17 @@ define <4 x i64> @vror_vi_v4i64(<4 x i64> %a) {
; CHECK-RV32-LABEL: vror_vi_v4i64:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.v.i v10, 0
+; CHECK-RV32-NEXT: vmv.v.i v12, 0
; CHECK-RV32-NEXT: li a0, 1
-; CHECK-RV32-NEXT: vwsubu.vx v12, v10, a0
+; CHECK-RV32-NEXT: vwsubu.vx v10, v12, a0
; CHECK-RV32-NEXT: li a0, 63
; CHECK-RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-RV32-NEXT: vmv.v.x v10, a0
-; CHECK-RV32-NEXT: vand.vx v12, v12, a0
-; CHECK-RV32-NEXT: vand.vi v10, v10, 1
-; CHECK-RV32-NEXT: vsll.vv v12, v8, v12
-; CHECK-RV32-NEXT: vsrl.vv v8, v8, v10
-; CHECK-RV32-NEXT: vor.vv v8, v8, v12
+; CHECK-RV32-NEXT: vmv.v.x v12, a0
+; CHECK-RV32-NEXT: vand.vx v10, v10, a0
+; CHECK-RV32-NEXT: vand.vi v12, v12, 1
+; CHECK-RV32-NEXT: vsll.vv v10, v8, v10
+; CHECK-RV32-NEXT: vsrl.vv v8, v8, v12
+; CHECK-RV32-NEXT: vor.vv v8, v8, v10
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64-LABEL: vror_vi_v4i64:
@@ -1907,17 +1907,17 @@ define <4 x i64> @vror_vi_rotl_v4i64(<4 x i64> %a) {
; CHECK-RV32-LABEL: vror_vi_rotl_v4i64:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.v.i v10, 0
+; CHECK-RV32-NEXT: vmv.v.i v12, 0
; CHECK-RV32-NEXT: li a0, 1
-; CHECK-RV32-NEXT: vwsubu.vx v12, v10, a0
+; CHECK-RV32-NEXT: vwsubu.vx v10, v12, a0
; CHECK-RV32-NEXT: li a0, 63
; CHECK-RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-RV32-NEXT: vmv.v.x v10, a0
-; CHECK-RV32-NEXT: vand.vx v12, v12, a0
-; CHECK-RV32-NEXT: vand.vi v10, v10, 1
-; CHECK-RV32-NEXT: vsrl.vv v12, v8, v12
-; CHECK-RV32-NEXT: vsll.vv v8, v8, v10
-; CHECK-RV32-NEXT: vor.vv v8, v8, v12
+; CHECK-RV32-NEXT: vmv.v.x v12, a0
+; CHECK-RV32-NEXT: vand.vx v10, v10, a0
+; CHECK-RV32-NEXT: vand.vi v12, v12, 1
+; CHECK-RV32-NEXT: vsrl.vv v10, v8, v10
+; CHECK-RV32-NEXT: vsll.vv v8, v8, v12
+; CHECK-RV32-NEXT: vor.vv v8, v8, v10
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64-LABEL: vror_vi_rotl_v4i64:
@@ -1969,12 +1969,12 @@ define <8 x i64> @vror_vx_v8i64(<8 x i64> %a, i64 %b) {
; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; CHECK-RV32-NEXT: vmv.v.x v12, a0
; CHECK-RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-RV32-NEXT: vmv.v.i v16, 0
-; CHECK-RV32-NEXT: vwsub.vx v20, v16, a0
+; CHECK-RV32-NEXT: vmv.v.i v20, 0
+; CHECK-RV32-NEXT: vwsub.vx v16, v20, a0
; CHECK-RV32-NEXT: li a0, 63
; CHECK-RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-RV32-NEXT: vand.vx v12, v12, a0
-; CHECK-RV32-NEXT: vand.vx v16, v20, a0
+; CHECK-RV32-NEXT: vand.vx v16, v16, a0
; CHECK-RV32-NEXT: vsll.vv v16, v8, v16
; CHECK-RV32-NEXT: vsrl.vv v8, v8, v12
; CHECK-RV32-NEXT: vor.vv v8, v8, v16
@@ -2008,17 +2008,17 @@ define <8 x i64> @vror_vi_v8i64(<8 x i64> %a) {
; CHECK-RV32-LABEL: vror_vi_v8i64:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-RV32-NEXT: vmv.v.i v12, 0
+; CHECK-RV32-NEXT: vmv.v.i v16, 0
; CHECK-RV32-NEXT: li a0, 1
-; CHECK-RV32-NEXT: vwsubu.vx v16, v12, a0
+; CHECK-RV32-NEXT: vwsubu.vx v12, v16, a0
; CHECK-RV32-NEXT: li a0, 63
; CHECK-RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-RV32-NEXT: vmv.v.x v12, a0
-; CHECK-RV32-NEXT: vand.vx v16, v16, a0
-; CHECK-RV32-NEXT: vand.vi v12, v12, 1
-; CHECK-RV32-NEXT: vsll.vv v16, v8, v16
-; CHECK-RV32-NEXT: vsrl.vv v8, v8, v12
-; CHECK-RV32-NEXT: vor.vv v8, v8, v16
+; CHECK-RV32-NEXT: vmv.v.x v16, a0
+; CHECK-RV32-NEXT: vand.vx v12, v12, a0
+; CHECK-RV32-NEXT: vand.vi v16, v16, 1
+; CHECK-RV32-NEXT: vsll.vv v12, v8, v12
+; CHECK-RV32-NEXT: vsrl.vv v8, v8, v16
+; CHECK-RV32-NEXT: vor.vv v8, v8, v12
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64-LABEL: vror_vi_v8i64:
@@ -2043,17 +2043,17 @@ define <8 x i64> @vror_vi_rotl_v8i64(<8 x i64> %a) {
; CHECK-RV32-LABEL: vror_vi_rotl_v8i64:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-RV32-NEXT: vmv.v.i v12, 0
+; CHECK-RV32-NEXT: vmv.v.i v16, 0
; CHECK-RV32-NEXT: li a0, 1
-; CHECK-RV32-NEXT: vwsubu.vx v16, v12, a0
+; CHECK-RV32-NEXT: vwsubu.vx v12, v16, a0
; CHECK-RV32-NEXT: li a0, 63
; CHECK-RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-RV32-NEXT: vmv.v.x v12, a0
-; CHECK-RV32-NEXT: vand.vx v16, v16, a0
-; CHECK-RV32-NEXT: vand.vi v12, v12, 1
-; CHECK-RV32-NEXT: vsrl.vv v16, v8, v16
-; CHECK-RV32-NEXT: vsll.vv v8, v8, v12
-; CHECK-RV32-NEXT: vor.vv v8, v8, v16
+; CHECK-RV32-NEXT: vmv.v.x v16, a0
+; CHECK-RV32-NEXT: vand.vx v12, v12, a0
+; CHECK-RV32-NEXT: vand.vi v16, v16, 1
+; CHECK-RV32-NEXT: vsrl.vv v12, v8, v12
+; CHECK-RV32-NEXT: vsll.vv v8, v8, v16
+; CHECK-RV32-NEXT: vor.vv v8, v8, v12
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64-LABEL: vror_vi_rotl_v8i64:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd-mask.ll
index d241b78e41391..7bac239cfffea 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd-mask.ll
@@ -5,12 +5,13 @@
define <8 x i64> @vwadd_wv_mask_v8i32(<8 x i32> %x, <8 x i64> %y) {
; CHECK-LABEL: vwadd_wv_mask_v8i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a0, 42
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: li a0, 42
; CHECK-NEXT: vmslt.vx v0, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, mu
-; CHECK-NEXT: vwadd.wv v12, v12, v8, v0.t
; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, mu
+; CHECK-NEXT: vwadd.wv v8, v8, v16, v0.t
; CHECK-NEXT: ret
%mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
%a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
@@ -22,12 +23,13 @@ define <8 x i64> @vwadd_wv_mask_v8i32(<8 x i32> %x, <8 x i64> %y) {
define <8 x i64> @vwaddu_wv_mask_v8i32(<8 x i32> %x, <8 x i64> %y) {
; CHECK-LABEL: vwaddu_wv_mask_v8i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a0, 42
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: li a0, 42
; CHECK-NEXT: vmslt.vx v0, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, mu
-; CHECK-NEXT: vwaddu.wv v12, v12, v8, v0.t
; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, mu
+; CHECK-NEXT: vwaddu.wv v8, v8, v16, v0.t
; CHECK-NEXT: ret
%mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
%a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
@@ -39,13 +41,13 @@ define <8 x i64> @vwaddu_wv_mask_v8i32(<8 x i32> %x, <8 x i64> %y) {
define <8 x i64> @vwaddu_vv_mask_v8i32(<8 x i32> %x, <8 x i32> %y) {
; CHECK-LABEL: vwaddu_vv_mask_v8i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a0, 42
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: li a0, 42
; CHECK-NEXT: vmslt.vx v0, v8, a0
-; CHECK-NEXT: vmv.v.i v12, 0
-; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
-; CHECK-NEXT: vwaddu.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv.v.i v10, 0
+; CHECK-NEXT: vmerge.vvm v14, v10, v8, v0
+; CHECK-NEXT: vwaddu.vv v8, v14, v12
; CHECK-NEXT: ret
%mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
%a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
@@ -58,12 +60,13 @@ define <8 x i64> @vwaddu_vv_mask_v8i32(<8 x i32> %x, <8 x i32> %y) {
define <8 x i64> @vwadd_wv_mask_v8i32_commutative(<8 x i32> %x, <8 x i64> %y) {
; CHECK-LABEL: vwadd_wv_mask_v8i32_commutative:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a0, 42
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: li a0, 42
; CHECK-NEXT: vmslt.vx v0, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, mu
-; CHECK-NEXT: vwadd.wv v12, v12, v8, v0.t
; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, mu
+; CHECK-NEXT: vwadd.wv v8, v8, v16, v0.t
; CHECK-NEXT: ret
%mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
%a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll
index 98f246b8741dc..feafde0114a7d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll
@@ -900,11 +900,12 @@ define <4 x i64> @crash(<4 x i16> %x, <4 x i16> %y) {
; CHECK-LABEL: crash:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vsext.vf4 v10, v8
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vsext.vf4 v8, v11
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vzext.vf2 v8, v9
-; CHECK-NEXT: vwaddu.wv v10, v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vzext.vf2 v11, v10
+; CHECK-NEXT: vwaddu.wv v8, v8, v11
; CHECK-NEXT: ret
%a = sext <4 x i16> %x to <4 x i64>
%b = zext <4 x i16> %y to <4 x i64>
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll
index 7b12a56b78661..2c9aed6274dd8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll
@@ -20,8 +20,9 @@ define <4 x i64> @vwsll_vv_v4i64_sext(<4 x i32> %a, <4 x i32> %b) {
; CHECK-ZVBB-LABEL: vwsll_vv_v4i64_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v9
+; CHECK-ZVBB-NEXT: vmv1r.v v11, v8
+; CHECK-ZVBB-NEXT: vwsll.vv v8, v11, v10
; CHECK-ZVBB-NEXT: ret
%x = zext <4 x i32> %a to <4 x i64>
%y = sext <4 x i32> %b to <4 x i64>
@@ -41,8 +42,9 @@ define <4 x i64> @vwsll_vv_v4i64_zext(<4 x i32> %a, <4 x i32> %b) {
; CHECK-ZVBB-LABEL: vwsll_vv_v4i64_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v9
+; CHECK-ZVBB-NEXT: vmv1r.v v11, v8
+; CHECK-ZVBB-NEXT: vwsll.vv v8, v11, v10
; CHECK-ZVBB-NEXT: ret
%x = zext <4 x i32> %a to <4 x i64>
%y = zext <4 x i32> %b to <4 x i64>
@@ -61,8 +63,8 @@ define <4 x i64> @vwsll_vx_i64_v4i64(<4 x i32> %a, i64 %b) {
; CHECK-ZVBB-LABEL: vwsll_vx_i64_v4i64:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <4 x i64> poison, i64 %b, i32 0
%splat = shufflevector <4 x i64> %head, <4 x i64> poison, <4 x i32> zeroinitializer
@@ -75,19 +77,19 @@ define <4 x i64> @vwsll_vx_i32_v4i64_sext(<4 x i32> %a, i32 %b) {
; CHECK-LABEL: vwsll_vx_i32_v4i64_sext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vsext.vf2 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vsext.vf2 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i32_v4i64_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vmv.v.x v11, a0
+; CHECK-ZVBB-NEXT: vwsll.vv v8, v10, v11
; CHECK-ZVBB-NEXT: ret
%head = insertelement <4 x i32> poison, i32 %b, i32 0
%splat = shufflevector <4 x i32> %head, <4 x i32> poison, <4 x i32> zeroinitializer
@@ -101,18 +103,18 @@ define <4 x i64> @vwsll_vx_i32_v4i64_zext(<4 x i32> %a, i32 %b) {
; CHECK-LABEL: vwsll_vx_i32_v4i64_zext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vzext.vf2 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vzext.vf2 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i32_v4i64_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <4 x i32> poison, i32 %b, i32 0
%splat = shufflevector <4 x i32> %head, <4 x i32> poison, <4 x i32> zeroinitializer
@@ -126,18 +128,18 @@ define <4 x i64> @vwsll_vx_i16_v4i64_sext(<4 x i32> %a, i16 %b) {
; CHECK-LABEL: vwsll_vx_i16_v4i64_sext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vsext.vf4 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vsext.vf4 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i16_v4i64_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <4 x i16> poison, i16 %b, i32 0
%splat = shufflevector <4 x i16> %head, <4 x i16> poison, <4 x i32> zeroinitializer
@@ -151,18 +153,18 @@ define <4 x i64> @vwsll_vx_i16_v4i64_zext(<4 x i32> %a, i16 %b) {
; CHECK-LABEL: vwsll_vx_i16_v4i64_zext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vzext.vf4 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vzext.vf4 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i16_v4i64_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <4 x i16> poison, i16 %b, i32 0
%splat = shufflevector <4 x i16> %head, <4 x i16> poison, <4 x i32> zeroinitializer
@@ -176,18 +178,18 @@ define <4 x i64> @vwsll_vx_i8_v4i64_sext(<4 x i32> %a, i8 %b) {
; CHECK-LABEL: vwsll_vx_i8_v4i64_sext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vsext.vf8 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vsext.vf8 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i8_v4i64_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <4 x i8> poison, i8 %b, i32 0
%splat = shufflevector <4 x i8> %head, <4 x i8> poison, <4 x i32> zeroinitializer
@@ -201,18 +203,18 @@ define <4 x i64> @vwsll_vx_i8_v4i64_zext(<4 x i32> %a, i8 %b) {
; CHECK-LABEL: vwsll_vx_i8_v4i64_zext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vzext.vf8 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vzext.vf8 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i8_v4i64_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <4 x i8> poison, i8 %b, i32 0
%splat = shufflevector <4 x i8> %head, <4 x i8> poison, <4 x i32> zeroinitializer
@@ -225,17 +227,17 @@ define <4 x i64> @vwsll_vx_i8_v4i64_zext(<4 x i32> %a, i8 %b) {
define <4 x i64> @vwsll_vi_v4i64(<4 x i32> %a) {
; CHECK-LABEL: vwsll_vi_v4i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a0, 4
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vwmulu.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: li a0, 4
+; CHECK-NEXT: vwmulu.vx v8, v10, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vi_v4i64:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vi v10, v8, 2
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vi v8, v10, 2
; CHECK-ZVBB-NEXT: ret
%x = zext <4 x i32> %a to <4 x i64>
%z = shl <4 x i64> %x, splat (i64 2)
@@ -258,8 +260,9 @@ define <8 x i32> @vwsll_vv_v8i32_sext(<8 x i16> %a, <8 x i16> %b) {
; CHECK-ZVBB-LABEL: vwsll_vv_v8i32_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v9
+; CHECK-ZVBB-NEXT: vmv1r.v v11, v8
+; CHECK-ZVBB-NEXT: vwsll.vv v8, v11, v10
; CHECK-ZVBB-NEXT: ret
%x = zext <8 x i16> %a to <8 x i32>
%y = sext <8 x i16> %b to <8 x i32>
@@ -279,8 +282,9 @@ define <8 x i32> @vwsll_vv_v8i32_zext(<8 x i16> %a, <8 x i16> %b) {
; CHECK-ZVBB-LABEL: vwsll_vv_v8i32_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v9
+; CHECK-ZVBB-NEXT: vmv1r.v v11, v8
+; CHECK-ZVBB-NEXT: vwsll.vv v8, v11, v10
; CHECK-ZVBB-NEXT: ret
%x = zext <8 x i16> %a to <8 x i32>
%y = zext <8 x i16> %b to <8 x i32>
@@ -299,8 +303,8 @@ define <8 x i32> @vwsll_vx_i64_v8i32(<8 x i16> %a, i64 %b) {
; CHECK-ZVBB-LABEL: vwsll_vx_i64_v8i32:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <8 x i64> poison, i64 %b, i32 0
%splat = shufflevector <8 x i64> %head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -321,8 +325,8 @@ define <8 x i32> @vwsll_vx_i32_v8i32(<8 x i16> %a, i32 %b) {
; CHECK-ZVBB-LABEL: vwsll_vx_i32_v8i32:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <8 x i32> poison, i32 %b, i32 0
%splat = shufflevector <8 x i32> %head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -335,19 +339,19 @@ define <8 x i32> @vwsll_vx_i16_v8i32_sext(<8 x i16> %a, i16 %b) {
; CHECK-LABEL: vwsll_vx_i16_v8i32_sext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vsext.vf2 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vsext.vf2 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i16_v8i32_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vmv.v.x v11, a0
+; CHECK-ZVBB-NEXT: vwsll.vv v8, v10, v11
; CHECK-ZVBB-NEXT: ret
%head = insertelement <8 x i16> poison, i16 %b, i32 0
%splat = shufflevector <8 x i16> %head, <8 x i16> poison, <8 x i32> zeroinitializer
@@ -361,18 +365,18 @@ define <8 x i32> @vwsll_vx_i16_v8i32_zext(<8 x i16> %a, i16 %b) {
; CHECK-LABEL: vwsll_vx_i16_v8i32_zext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vzext.vf2 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vzext.vf2 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i16_v8i32_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <8 x i16> poison, i16 %b, i32 0
%splat = shufflevector <8 x i16> %head, <8 x i16> poison, <8 x i32> zeroinitializer
@@ -386,18 +390,18 @@ define <8 x i32> @vwsll_vx_i8_v8i32_sext(<8 x i16> %a, i8 %b) {
; CHECK-LABEL: vwsll_vx_i8_v8i32_sext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vsext.vf4 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vsext.vf4 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i8_v8i32_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <8 x i8> poison, i8 %b, i32 0
%splat = shufflevector <8 x i8> %head, <8 x i8> poison, <8 x i32> zeroinitializer
@@ -411,18 +415,18 @@ define <8 x i32> @vwsll_vx_i8_v8i32_zext(<8 x i16> %a, i8 %b) {
; CHECK-LABEL: vwsll_vx_i8_v8i32_zext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vzext.vf4 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vzext.vf4 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i8_v8i32_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <8 x i8> poison, i8 %b, i32 0
%splat = shufflevector <8 x i8> %head, <8 x i8> poison, <8 x i32> zeroinitializer
@@ -435,17 +439,17 @@ define <8 x i32> @vwsll_vx_i8_v8i32_zext(<8 x i16> %a, i8 %b) {
define <8 x i32> @vwsll_vi_v8i32(<8 x i16> %a) {
; CHECK-LABEL: vwsll_vi_v8i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a0, 4
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vwmulu.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: li a0, 4
+; CHECK-NEXT: vwmulu.vx v8, v10, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vi_v8i32:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vi v10, v8, 2
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vi v8, v10, 2
; CHECK-ZVBB-NEXT: ret
%x = zext <8 x i16> %a to <8 x i32>
%z = shl <8 x i32> %x, splat (i32 2)
@@ -468,8 +472,9 @@ define <16 x i16> @vwsll_vv_v16i16_sext(<16 x i8> %a, <16 x i8> %b) {
; CHECK-ZVBB-LABEL: vwsll_vv_v16i16_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v9
+; CHECK-ZVBB-NEXT: vmv1r.v v11, v8
+; CHECK-ZVBB-NEXT: vwsll.vv v8, v11, v10
; CHECK-ZVBB-NEXT: ret
%x = zext <16 x i8> %a to <16 x i16>
%y = sext <16 x i8> %b to <16 x i16>
@@ -489,8 +494,9 @@ define <16 x i16> @vwsll_vv_v16i16_zext(<16 x i8> %a, <16 x i8> %b) {
; CHECK-ZVBB-LABEL: vwsll_vv_v16i16_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v9
+; CHECK-ZVBB-NEXT: vmv1r.v v11, v8
+; CHECK-ZVBB-NEXT: vwsll.vv v8, v11, v10
; CHECK-ZVBB-NEXT: ret
%x = zext <16 x i8> %a to <16 x i16>
%y = zext <16 x i8> %b to <16 x i16>
@@ -506,12 +512,12 @@ define <16 x i16> @vwsll_vx_i64_v16i16(<16 x i8> %a, i64 %b) {
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vrgather.vi v24, v16, 0
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV32-NEXT: vzext.vf2 v10, v8
+; RV32-NEXT: vzext.vf2 v12, v8
; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; RV32-NEXT: vnsrl.wi v12, v24, 0
+; RV32-NEXT: vnsrl.wi v8, v24, 0
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV32-NEXT: vnsrl.wi v8, v12, 0
-; RV32-NEXT: vsll.vv v8, v10, v8
+; RV32-NEXT: vnsrl.wi v14, v8, 0
+; RV32-NEXT: vsll.vv v8, v12, v14
; RV32-NEXT: ret
;
; RV64-LABEL: vwsll_vx_i64_v16i16:
@@ -519,12 +525,12 @@ define <16 x i16> @vwsll_vx_i64_v16i16(<16 x i8> %a, i64 %b) {
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vmv.v.x v16, a0
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64-NEXT: vzext.vf2 v10, v8
+; RV64-NEXT: vzext.vf2 v12, v8
; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; RV64-NEXT: vnsrl.wi v12, v16, 0
+; RV64-NEXT: vnsrl.wi v8, v16, 0
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64-NEXT: vnsrl.wi v8, v12, 0
-; RV64-NEXT: vsll.vv v8, v10, v8
+; RV64-NEXT: vnsrl.wi v14, v8, 0
+; RV64-NEXT: vsll.vv v8, v12, v14
; RV64-NEXT: ret
;
; CHECK-ZVBB-RV32-LABEL: vwsll_vx_i64_v16i16:
@@ -534,19 +540,19 @@ define <16 x i16> @vwsll_vx_i64_v16i16(<16 x i8> %a, i64 %b) {
; CHECK-ZVBB-RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-ZVBB-RV32-NEXT: vrgather.vi v24, v16, 0
; CHECK-ZVBB-RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-ZVBB-RV32-NEXT: vzext.vf2 v10, v8
+; CHECK-ZVBB-RV32-NEXT: vzext.vf2 v12, v8
; CHECK-ZVBB-RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-ZVBB-RV32-NEXT: vnsrl.wi v12, v24, 0
+; CHECK-ZVBB-RV32-NEXT: vnsrl.wi v8, v24, 0
; CHECK-ZVBB-RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-ZVBB-RV32-NEXT: vnsrl.wi v8, v12, 0
-; CHECK-ZVBB-RV32-NEXT: vsll.vv v8, v10, v8
+; CHECK-ZVBB-RV32-NEXT: vnsrl.wi v14, v8, 0
+; CHECK-ZVBB-RV32-NEXT: vsll.vv v8, v12, v14
; CHECK-ZVBB-RV32-NEXT: ret
;
; CHECK-ZVBB-RV64-LABEL: vwsll_vx_i64_v16i16:
; CHECK-ZVBB-RV64: # %bb.0:
; CHECK-ZVBB-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-ZVBB-RV64-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-RV64-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-RV64-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-RV64-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-RV64-NEXT: ret
%head = insertelement <8 x i64> poison, i64 %b, i32 0
%splat = shufflevector <8 x i64> %head, <8 x i64> poison, <16 x i32> zeroinitializer
@@ -570,8 +576,8 @@ define <16 x i16> @vwsll_vx_i32_v16i16(<16 x i8> %a, i32 %b) {
; CHECK-ZVBB-LABEL: vwsll_vx_i32_v16i16:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <16 x i32> poison, i32 %b, i32 0
%splat = shufflevector <16 x i32> %head, <16 x i32> poison, <16 x i32> zeroinitializer
@@ -592,8 +598,8 @@ define <16 x i16> @vwsll_vx_i16_v16i16(<16 x i8> %a, i16 %b) {
; CHECK-ZVBB-LABEL: vwsll_vx_i16_v16i16:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <16 x i16> poison, i16 %b, i32 0
%splat = shufflevector <16 x i16> %head, <16 x i16> poison, <16 x i32> zeroinitializer
@@ -606,19 +612,19 @@ define <16 x i16> @vwsll_vx_i8_v16i16_sext(<16 x i8> %a, i8 %b) {
; CHECK-LABEL: vwsll_vx_i8_v16i16_sext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vsext.vf2 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vsext.vf2 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i8_v16i16_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vmv.v.x v11, a0
+; CHECK-ZVBB-NEXT: vwsll.vv v8, v10, v11
; CHECK-ZVBB-NEXT: ret
%head = insertelement <16 x i8> poison, i8 %b, i32 0
%splat = shufflevector <16 x i8> %head, <16 x i8> poison, <16 x i32> zeroinitializer
@@ -632,18 +638,18 @@ define <16 x i16> @vwsll_vx_i8_v16i16_zext(<16 x i8> %a, i8 %b) {
; CHECK-LABEL: vwsll_vx_i8_v16i16_zext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vzext.vf2 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vzext.vf2 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i8_v16i16_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <16 x i8> poison, i8 %b, i32 0
%splat = shufflevector <16 x i8> %head, <16 x i8> poison, <16 x i32> zeroinitializer
@@ -656,17 +662,17 @@ define <16 x i16> @vwsll_vx_i8_v16i16_zext(<16 x i8> %a, i8 %b) {
define <16 x i16> @vwsll_vi_v16i16(<16 x i8> %a) {
; CHECK-LABEL: vwsll_vi_v16i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a0, 4
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: vwmulu.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: li a0, 4
+; CHECK-NEXT: vwmulu.vx v8, v10, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vi_v16i16:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vi v10, v8, 2
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vi v8, v10, 2
; CHECK-ZVBB-NEXT: ret
%x = zext <16 x i8> %a to <16 x i16>
%z = shl <16 x i16> %x, splat (i16 2)
@@ -746,21 +752,21 @@ define <4 x i64> @vwsll_vx_i32_v4i64_v4i8_sext(<4 x i8> %a, i32 %b) {
; CHECK-LABEL: vwsll_vx_i32_v4i64_v4i8_sext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf8 v10, v8
-; CHECK-NEXT: vsext.vf2 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vsext.vf2 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i32_v4i64_v4i8_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
+; CHECK-ZVBB-NEXT: vmv.v.x v12, a0
; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8
-; CHECK-ZVBB-NEXT: vsext.vf2 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vsext.vf2 v8, v12
+; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v8
; CHECK-ZVBB-NEXT: ret
%head = insertelement <4 x i32> poison, i32 %b, i32 0
%splat = shufflevector <4 x i32> %head, <4 x i32> poison, <4 x i32> zeroinitializer
@@ -774,11 +780,11 @@ define <4 x i64> @vwsll_vx_i32_v4i64_v4i8_zext(<4 x i8> %a, i32 %b) {
; CHECK-LABEL: vwsll_vx_i32_v4i64_v4i8_zext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf8 v10, v8
-; CHECK-NEXT: vzext.vf2 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vzext.vf2 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i32_v4i64_v4i8_zext:
@@ -799,21 +805,21 @@ define <4 x i64> @vwsll_vx_i16_v4i64_v4i8_sext(<4 x i8> %a, i16 %b) {
; CHECK-LABEL: vwsll_vx_i16_v4i64_v4i8_sext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf8 v10, v8
-; CHECK-NEXT: vsext.vf4 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vsext.vf4 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i16_v4i64_v4i8_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
+; CHECK-ZVBB-NEXT: vmv.v.x v12, a0
; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8
-; CHECK-ZVBB-NEXT: vsext.vf4 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vsext.vf4 v8, v12
+; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v8
; CHECK-ZVBB-NEXT: ret
%head = insertelement <4 x i16> poison, i16 %b, i32 0
%splat = shufflevector <4 x i16> %head, <4 x i16> poison, <4 x i32> zeroinitializer
@@ -827,11 +833,11 @@ define <4 x i64> @vwsll_vx_i16_v4i64_v4i8_zext(<4 x i8> %a, i16 %b) {
; CHECK-LABEL: vwsll_vx_i16_v4i64_v4i8_zext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf8 v10, v8
-; CHECK-NEXT: vzext.vf4 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vzext.vf4 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i16_v4i64_v4i8_zext:
@@ -852,21 +858,21 @@ define <4 x i64> @vwsll_vx_i8_v4i64_v4i8_sext(<4 x i8> %a, i8 %b) {
; CHECK-LABEL: vwsll_vx_i8_v4i64_v4i8_sext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf8 v10, v8
-; CHECK-NEXT: vsext.vf8 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vsext.vf8 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i8_v4i64_v4i8_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
+; CHECK-ZVBB-NEXT: vmv.v.x v12, a0
; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8
-; CHECK-ZVBB-NEXT: vsext.vf8 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vsext.vf8 v8, v12
+; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v8
; CHECK-ZVBB-NEXT: ret
%head = insertelement <4 x i8> poison, i8 %b, i32 0
%splat = shufflevector <4 x i8> %head, <4 x i8> poison, <4 x i32> zeroinitializer
@@ -880,11 +886,11 @@ define <4 x i64> @vwsll_vx_i8_v4i64_v4i8_zext(<4 x i8> %a, i8 %b) {
; CHECK-LABEL: vwsll_vx_i8_v4i64_v4i8_zext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf8 v10, v8
-; CHECK-NEXT: vzext.vf8 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vzext.vf8 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i8_v4i64_v4i8_zext:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll
index 382f00913cb41..eafea7292a54b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll
@@ -5,12 +5,13 @@
define <8 x i64> @vwsub_wv_mask_v8i32(<8 x i32> %x, <8 x i64> %y) {
; CHECK-LABEL: vwsub_wv_mask_v8i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a0, 42
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: li a0, 42
; CHECK-NEXT: vmslt.vx v0, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, mu
-; CHECK-NEXT: vwsub.wv v12, v12, v8, v0.t
; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, mu
+; CHECK-NEXT: vwsub.wv v8, v8, v16, v0.t
; CHECK-NEXT: ret
%mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
%a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
@@ -22,12 +23,13 @@ define <8 x i64> @vwsub_wv_mask_v8i32(<8 x i32> %x, <8 x i64> %y) {
define <8 x i64> @vwsubu_wv_mask_v8i32(<8 x i32> %x, <8 x i64> %y) {
; CHECK-LABEL: vwsubu_wv_mask_v8i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a0, 42
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: li a0, 42
; CHECK-NEXT: vmslt.vx v0, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, mu
-; CHECK-NEXT: vwsubu.wv v12, v12, v8, v0.t
; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, mu
+; CHECK-NEXT: vwsubu.wv v8, v8, v16, v0.t
; CHECK-NEXT: ret
%mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
%a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
@@ -39,13 +41,13 @@ define <8 x i64> @vwsubu_wv_mask_v8i32(<8 x i32> %x, <8 x i64> %y) {
define <8 x i64> @vwsubu_vv_mask_v8i32(<8 x i32> %x, <8 x i32> %y) {
; CHECK-LABEL: vwsubu_vv_mask_v8i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a0, 42
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: li a0, 42
; CHECK-NEXT: vmslt.vx v0, v8, a0
-; CHECK-NEXT: vmv.v.i v12, 0
-; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
-; CHECK-NEXT: vwsubu.vv v12, v10, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv.v.i v10, 0
+; CHECK-NEXT: vmerge.vvm v14, v10, v8, v0
+; CHECK-NEXT: vwsubu.vv v8, v12, v14
; CHECK-NEXT: ret
%mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
%a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll
index df90dae379c06..8259336e8668c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll
@@ -56,8 +56,8 @@ define <4 x i64> @vzext_v4i64_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl)
; CHECK-LABEL: vzext_v4i64_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vzext.vf8 v10, v8, v0.t
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vzext.vf8 v8, v10, v0.t
; CHECK-NEXT: ret
%v = call <4 x i64> @llvm.vp.zext.v4i64.v4i8(<4 x i8> %va, <4 x i1> %m, i32 %evl)
ret <4 x i64> %v
@@ -67,8 +67,8 @@ define <4 x i64> @vzext_v4i64_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) {
; CHECK-LABEL: vzext_v4i64_v4i8_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vzext.vf8 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vzext.vf8 v8, v10
; CHECK-NEXT: ret
%v = call <4 x i64> @llvm.vp.zext.v4i64.v4i8(<4 x i8> %va, <4 x i1> splat (i1 true), i32 %evl)
ret <4 x i64> %v
@@ -104,8 +104,8 @@ define <4 x i64> @vzext_v4i64_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl
; CHECK-LABEL: vzext_v4i64_v4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vzext.vf4 v10, v8, v0.t
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vzext.vf4 v8, v10, v0.t
; CHECK-NEXT: ret
%v = call <4 x i64> @llvm.vp.zext.v4i64.v4i16(<4 x i16> %va, <4 x i1> %m, i32 %evl)
ret <4 x i64> %v
@@ -115,8 +115,8 @@ define <4 x i64> @vzext_v4i64_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
; CHECK-LABEL: vzext_v4i64_v4i16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vzext.vf4 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vzext.vf4 v8, v10
; CHECK-NEXT: ret
%v = call <4 x i64> @llvm.vp.zext.v4i64.v4i16(<4 x i16> %va, <4 x i1> splat (i1 true), i32 %evl)
ret <4 x i64> %v
@@ -128,8 +128,8 @@ define <4 x i64> @vzext_v4i64_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl
; CHECK-LABEL: vzext_v4i64_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v10, v8, v0.t
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vzext.vf2 v8, v10, v0.t
; CHECK-NEXT: ret
%v = call <4 x i64> @llvm.vp.zext.v4i64.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
ret <4 x i64> %v
@@ -139,8 +139,8 @@ define <4 x i64> @vzext_v4i64_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
; CHECK-LABEL: vzext_v4i64_v4i32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vzext.vf2 v8, v10
; CHECK-NEXT: ret
%v = call <4 x i64> @llvm.vp.zext.v4i64.v4i32(<4 x i32> %va, <4 x i1> splat (i1 true), i32 %evl)
ret <4 x i64> %v
@@ -153,24 +153,25 @@ define <32 x i64> @vzext_v32i64_v32i32(<32 x i32> %va, <32 x i1> %m, i32 zeroext
; CHECK: # %bb.0:
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v16, v0, 2
+; CHECK-NEXT: vslidedown.vi v24, v0, 2
; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB12_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: .LBB12_2:
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT: vzext.vf2 v24, v8, v0.t
+; CHECK-NEXT: vzext.vf2 v16, v8, v0.t
; CHECK-NEXT: addi a1, a0, -16
; CHECK-NEXT: sltu a0, a0, a1
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v8, 16
-; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vzext.vf2 v16, v8, v0.t
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vzext.vf2 v24, v8, v0.t
+; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv.v.v v16, v24
; CHECK-NEXT: ret
%v = call <32 x i64> @llvm.vp.zext.v32i64.v32i32(<32 x i32> %va, <32 x i1> %m, i32 %evl)
ret <32 x i64> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll
index b7661bd826fed..4512d809995a4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll
@@ -358,15 +358,15 @@ define <vscale x 4 x i64> @trunc_nxv4f32_to_si64(<vscale x 4 x float> %x) {
; RV32-LABEL: trunc_nxv4f32_to_si64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT: vfwcvt.rtz.x.f.v v12, v8
-; RV32-NEXT: vmv4r.v v8, v12
+; RV32-NEXT: vmv2r.v v12, v8
+; RV32-NEXT: vfwcvt.rtz.x.f.v v8, v12
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv4f32_to_si64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT: vfwcvt.rtz.x.f.v v12, v8
-; RV64-NEXT: vmv4r.v v8, v12
+; RV64-NEXT: vmv2r.v v12, v8
+; RV64-NEXT: vfwcvt.rtz.x.f.v v8, v12
; RV64-NEXT: ret
%a = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> %x)
%b = fptosi <vscale x 4 x float> %a to <vscale x 4 x i64>
@@ -377,15 +377,15 @@ define <vscale x 4 x i64> @trunc_nxv4f32_to_ui64(<vscale x 4 x float> %x) {
; RV32-LABEL: trunc_nxv4f32_to_ui64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT: vfwcvt.rtz.xu.f.v v12, v8
-; RV32-NEXT: vmv4r.v v8, v12
+; RV32-NEXT: vmv2r.v v12, v8
+; RV32-NEXT: vfwcvt.rtz.xu.f.v v8, v12
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv4f32_to_ui64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT: vfwcvt.rtz.xu.f.v v12, v8
-; RV64-NEXT: vmv4r.v v8, v12
+; RV64-NEXT: vmv2r.v v12, v8
+; RV64-NEXT: vfwcvt.rtz.xu.f.v v8, v12
; RV64-NEXT: ret
%a = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> %x)
%b = fptoui <vscale x 4 x float> %a to <vscale x 4 x i64>
@@ -801,20 +801,20 @@ define <vscale x 4 x i32> @ceil_nxv4f32_to_ui32(<vscale x 4 x float> %x) {
define <vscale x 4 x i64> @ceil_nxv4f32_to_si64(<vscale x 4 x float> %x) {
; RV32-LABEL: ceil_nxv4f32_to_si64:
; RV32: # %bb.0:
+; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv2r.v v12, v8
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV32-NEXT: vfwcvt.x.f.v v12, v8
+; RV32-NEXT: vfwcvt.x.f.v v8, v12
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vmv4r.v v8, v12
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv4f32_to_si64:
; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; RV64-NEXT: vmv2r.v v12, v8
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV64-NEXT: vfwcvt.x.f.v v12, v8
+; RV64-NEXT: vfwcvt.x.f.v v8, v12
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vmv4r.v v8, v12
; RV64-NEXT: ret
%a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x)
%b = fptosi <vscale x 4 x float> %a to <vscale x 4 x i64>
@@ -824,20 +824,20 @@ define <vscale x 4 x i64> @ceil_nxv4f32_to_si64(<vscale x 4 x float> %x) {
define <vscale x 4 x i64> @ceil_nxv4f32_to_ui64(<vscale x 4 x float> %x) {
; RV32-LABEL: ceil_nxv4f32_to_ui64:
; RV32: # %bb.0:
+; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv2r.v v12, v8
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV32-NEXT: vfwcvt.xu.f.v v12, v8
+; RV32-NEXT: vfwcvt.xu.f.v v8, v12
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vmv4r.v v8, v12
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv4f32_to_ui64:
; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; RV64-NEXT: vmv2r.v v12, v8
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV64-NEXT: vfwcvt.xu.f.v v12, v8
+; RV64-NEXT: vfwcvt.xu.f.v v8, v12
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vmv4r.v v8, v12
; RV64-NEXT: ret
%a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x)
%b = fptoui <vscale x 4 x float> %a to <vscale x 4 x i64>
@@ -1004,15 +1004,15 @@ define <vscale x 4 x i64> @rint_nxv4f32_to_si64(<vscale x 4 x float> %x) {
; RV32-LABEL: rint_nxv4f32_to_si64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT: vfwcvt.x.f.v v12, v8
-; RV32-NEXT: vmv4r.v v8, v12
+; RV32-NEXT: vmv2r.v v12, v8
+; RV32-NEXT: vfwcvt.x.f.v v8, v12
; RV32-NEXT: ret
;
; RV64-LABEL: rint_nxv4f32_to_si64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT: vfwcvt.x.f.v v12, v8
-; RV64-NEXT: vmv4r.v v8, v12
+; RV64-NEXT: vmv2r.v v12, v8
+; RV64-NEXT: vfwcvt.x.f.v v8, v12
; RV64-NEXT: ret
%a = call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> %x)
%b = fptosi <vscale x 4 x float> %a to <vscale x 4 x i64>
@@ -1023,15 +1023,15 @@ define <vscale x 4 x i64> @rint_nxv4f32_to_ui64(<vscale x 4 x float> %x) {
; RV32-LABEL: rint_nxv4f32_to_ui64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT: vfwcvt.xu.f.v v12, v8
-; RV32-NEXT: vmv4r.v v8, v12
+; RV32-NEXT: vmv2r.v v12, v8
+; RV32-NEXT: vfwcvt.xu.f.v v8, v12
; RV32-NEXT: ret
;
; RV64-LABEL: rint_nxv4f32_to_ui64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT: vfwcvt.xu.f.v v12, v8
-; RV64-NEXT: vmv4r.v v8, v12
+; RV64-NEXT: vmv2r.v v12, v8
+; RV64-NEXT: vfwcvt.xu.f.v v8, v12
; RV64-NEXT: ret
%a = call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> %x)
%b = fptoui <vscale x 4 x float> %a to <vscale x 4 x i64>
diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll
index 21fa551ad75df..6e214578d47bd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll
@@ -406,12 +406,12 @@ define <vscale x 32 x bfloat> @vfmax_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma
-; CHECK-NEXT: vmset.m v24
+; CHECK-NEXT: vmset.m v16
; CHECK-NEXT: slli a1, a2, 1
; CHECK-NEXT: srli a2, a2, 2
; CHECK-NEXT: sub a3, a0, a1
; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vx v7, v24, a2
+; CHECK-NEXT: vslidedown.vx v7, v16, a2
; CHECK-NEXT: sltu a2, a0, a3
; CHECK-NEXT: addi a2, a2, -1
; CHECK-NEXT: and a2, a2, a3
@@ -1050,12 +1050,12 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16_unmasked(<vscale x 32 x half> %va
; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma
-; ZVFHMIN-NEXT: vmset.m v24
+; ZVFHMIN-NEXT: vmset.m v16
; ZVFHMIN-NEXT: slli a1, a2, 1
; ZVFHMIN-NEXT: srli a2, a2, 2
; ZVFHMIN-NEXT: sub a3, a0, a1
; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT: vslidedown.vx v7, v24, a2
+; ZVFHMIN-NEXT: vslidedown.vx v7, v16, a2
; ZVFHMIN-NEXT: sltu a2, a0, a3
; ZVFHMIN-NEXT: addi a2, a2, -1
; ZVFHMIN-NEXT: and a2, a2, a3
diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll
index b2269848897a2..d44c54dd965e1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll
@@ -406,12 +406,12 @@ define <vscale x 32 x bfloat> @vfmin_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma
-; CHECK-NEXT: vmset.m v24
+; CHECK-NEXT: vmset.m v16
; CHECK-NEXT: slli a1, a2, 1
; CHECK-NEXT: srli a2, a2, 2
; CHECK-NEXT: sub a3, a0, a1
; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vx v7, v24, a2
+; CHECK-NEXT: vslidedown.vx v7, v16, a2
; CHECK-NEXT: sltu a2, a0, a3
; CHECK-NEXT: addi a2, a2, -1
; CHECK-NEXT: and a2, a2, a3
@@ -1050,12 +1050,12 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16_unmasked(<vscale x 32 x half> %va
; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma
-; ZVFHMIN-NEXT: vmset.m v24
+; ZVFHMIN-NEXT: vmset.m v16
; ZVFHMIN-NEXT: slli a1, a2, 1
; ZVFHMIN-NEXT: srli a2, a2, 2
; ZVFHMIN-NEXT: sub a3, a0, a1
; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT: vslidedown.vx v7, v24, a2
+; ZVFHMIN-NEXT: vslidedown.vx v7, v16, a2
; ZVFHMIN-NEXT: sltu a2, a0, a3
; ZVFHMIN-NEXT: addi a2, a2, -1
; ZVFHMIN-NEXT: and a2, a2, a3
diff --git a/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll
index 52e5ecf9cb8a1..bc45671077106 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll
@@ -284,11 +284,11 @@ define <vscale x 2 x i64> @test_signed_v2f16_v2i64(<vscale x 2 x half> %f) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
-; CHECK-NEXT: vfwcvt.f.f.v v9, v8
+; CHECK-NEXT: vfwcvt.f.f.v v10, v8
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v9
+; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v10
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v8, v10, 0, v0
+; CHECK-NEXT: vmerge.vim v8, v8, 0, v0
; CHECK-NEXT: ret
%x = call <vscale x 2 x i64> @llvm.fptosi.sat.nxv2f16.nxv2i64(<vscale x 2 x half> %f)
ret <vscale x 2 x i64> %x
@@ -299,11 +299,11 @@ define <vscale x 4 x i64> @test_signed_v4f16_v4i64(<vscale x 4 x half> %f) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
-; CHECK-NEXT: vfwcvt.f.f.v v10, v8
+; CHECK-NEXT: vfwcvt.f.f.v v12, v8
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v10
+; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v12
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT: vmerge.vim v8, v12, 0, v0
+; CHECK-NEXT: vmerge.vim v8, v8, 0, v0
; CHECK-NEXT: ret
%x = call <vscale x 4 x i64> @llvm.fptosi.sat.nxv4f16.nxv4i64(<vscale x 4 x half> %f)
ret <vscale x 4 x i64> %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll
index 02b43c2d95295..bb5ad6ba9d88a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll
@@ -284,11 +284,11 @@ define <vscale x 2 x i64> @test_signed_v2f16_v2i64(<vscale x 2 x half> %f) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
-; CHECK-NEXT: vfwcvt.f.f.v v9, v8
+; CHECK-NEXT: vfwcvt.f.f.v v10, v8
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v9
+; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v10
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v8, v10, 0, v0
+; CHECK-NEXT: vmerge.vim v8, v8, 0, v0
; CHECK-NEXT: ret
%x = call <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f16.nxv2i64(<vscale x 2 x half> %f)
ret <vscale x 2 x i64> %x
@@ -299,11 +299,11 @@ define <vscale x 4 x i64> @test_signed_v4f16_v4i64(<vscale x 4 x half> %f) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vmfne.vv v0, v8, v8
-; CHECK-NEXT: vfwcvt.f.f.v v10, v8
+; CHECK-NEXT: vfwcvt.f.f.v v12, v8
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v10
+; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v12
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT: vmerge.vim v8, v12, 0, v0
+; CHECK-NEXT: vmerge.vim v8, v8, 0, v0
; CHECK-NEXT: ret
%x = call <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f16.nxv4i64(<vscale x 4 x half> %f)
ret <vscale x 4 x i64> %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll
index 4d47c265a9747..ae0542fb5b74f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll
@@ -178,8 +178,8 @@ define <vscale x 4 x i32> @trunc_nxv4f16_to_si32(<vscale x 4 x half> %x) {
; CHECK-LABEL: trunc_nxv4f16_to_si32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v10
; CHECK-NEXT: ret
%a = call <vscale x 4 x half> @llvm.trunc.nxv4f16(<vscale x 4 x half> %x)
%b = fptosi <vscale x 4 x half> %a to <vscale x 4 x i32>
@@ -190,8 +190,8 @@ define <vscale x 4 x i32> @trunc_nxv4f16_to_ui32(<vscale x 4 x half> %x) {
; CHECK-LABEL: trunc_nxv4f16_to_ui32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v10
; CHECK-NEXT: ret
%a = call <vscale x 4 x half> @llvm.trunc.nxv4f16(<vscale x 4 x half> %x)
%b = fptoui <vscale x 4 x half> %a to <vscale x 4 x i32>
@@ -505,11 +505,11 @@ define <vscale x 4 x i16> @ceil_nxv4f16_to_ui16(<vscale x 4 x half> %x) {
define <vscale x 4 x i32> @ceil_nxv4f16_to_si32(<vscale x 4 x half> %x) {
; CHECK-LABEL: ceil_nxv4f16_to_si32:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v8
; CHECK-NEXT: fsrmi a0, 3
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvt.x.f.v v10, v8
+; CHECK-NEXT: vfwcvt.x.f.v v8, v10
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vmv2r.v v8, v10
; CHECK-NEXT: ret
%a = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %x)
%b = fptosi <vscale x 4 x half> %a to <vscale x 4 x i32>
@@ -519,11 +519,11 @@ define <vscale x 4 x i32> @ceil_nxv4f16_to_si32(<vscale x 4 x half> %x) {
define <vscale x 4 x i32> @ceil_nxv4f16_to_ui32(<vscale x 4 x half> %x) {
; CHECK-LABEL: ceil_nxv4f16_to_ui32:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v8
; CHECK-NEXT: fsrmi a0, 3
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvt.xu.f.v v10, v8
+; CHECK-NEXT: vfwcvt.xu.f.v v8, v10
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vmv2r.v v8, v10
; CHECK-NEXT: ret
%a = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %x)
%b = fptoui <vscale x 4 x half> %a to <vscale x 4 x i32>
@@ -886,8 +886,8 @@ define <vscale x 4 x i32> @rint_nxv4f16_to_si32(<vscale x 4 x half> %x) {
; CHECK-LABEL: rint_nxv4f16_to_si32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvt.x.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.x.f.v v8, v10
; CHECK-NEXT: ret
%a = call <vscale x 4 x half> @llvm.rint.nxv4f16(<vscale x 4 x half> %x)
%b = fptosi <vscale x 4 x half> %a to <vscale x 4 x i32>
@@ -898,8 +898,8 @@ define <vscale x 4 x i32> @rint_nxv4f16_to_ui32(<vscale x 4 x half> %x) {
; CHECK-LABEL: rint_nxv4f16_to_ui32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvt.xu.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.xu.f.v v8, v10
; CHECK-NEXT: ret
%a = call <vscale x 4 x half> @llvm.rint.nxv4f16(<vscale x 4 x half> %x)
%b = fptoui <vscale x 4 x half> %a to <vscale x 4 x i32>
diff --git a/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll b/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll
index 7e2ec46339b33..e7b85c5d6902b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll
@@ -7,28 +7,28 @@ define void @interleave256(ptr %agg.result, ptr %0, ptr %1) {
; RV64-1024: # %bb.0: # %entry
; RV64-1024-NEXT: li a3, 128
; RV64-1024-NEXT: vsetvli zero, a3, e16, m2, ta, ma
-; RV64-1024-NEXT: vle16.v v8, (a1)
-; RV64-1024-NEXT: vle16.v v10, (a2)
+; RV64-1024-NEXT: vle16.v v12, (a1)
+; RV64-1024-NEXT: vle16.v v14, (a2)
; RV64-1024-NEXT: li a1, -1
-; RV64-1024-NEXT: vwaddu.vv v12, v8, v10
-; RV64-1024-NEXT: vwmaccu.vx v12, a1, v10
+; RV64-1024-NEXT: vwaddu.vv v8, v12, v14
+; RV64-1024-NEXT: vwmaccu.vx v8, a1, v14
; RV64-1024-NEXT: li a1, 256
; RV64-1024-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; RV64-1024-NEXT: vse16.v v12, (a0)
+; RV64-1024-NEXT: vse16.v v8, (a0)
; RV64-1024-NEXT: ret
;
; RV64-2048-LABEL: interleave256:
; RV64-2048: # %bb.0: # %entry
; RV64-2048-NEXT: li a3, 128
; RV64-2048-NEXT: vsetvli zero, a3, e16, m1, ta, ma
-; RV64-2048-NEXT: vle16.v v8, (a1)
-; RV64-2048-NEXT: vle16.v v9, (a2)
+; RV64-2048-NEXT: vle16.v v10, (a1)
+; RV64-2048-NEXT: vle16.v v11, (a2)
; RV64-2048-NEXT: li a1, -1
-; RV64-2048-NEXT: vwaddu.vv v10, v8, v9
-; RV64-2048-NEXT: vwmaccu.vx v10, a1, v9
+; RV64-2048-NEXT: vwaddu.vv v8, v10, v11
+; RV64-2048-NEXT: vwmaccu.vx v8, a1, v11
; RV64-2048-NEXT: li a1, 256
; RV64-2048-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; RV64-2048-NEXT: vse16.v v10, (a0)
+; RV64-2048-NEXT: vse16.v v8, (a0)
; RV64-2048-NEXT: ret
entry:
%ve = load <128 x i16>, ptr %0, align 256
@@ -45,28 +45,28 @@ define void @interleave512(ptr %agg.result, ptr %0, ptr %1) local_unnamed_addr {
; RV64-1024: # %bb.0: # %entry
; RV64-1024-NEXT: li a3, 256
; RV64-1024-NEXT: vsetvli zero, a3, e16, m4, ta, ma
-; RV64-1024-NEXT: vle16.v v8, (a1)
-; RV64-1024-NEXT: vle16.v v12, (a2)
+; RV64-1024-NEXT: vle16.v v16, (a1)
+; RV64-1024-NEXT: vle16.v v20, (a2)
; RV64-1024-NEXT: li a1, -1
-; RV64-1024-NEXT: vwaddu.vv v16, v8, v12
-; RV64-1024-NEXT: vwmaccu.vx v16, a1, v12
+; RV64-1024-NEXT: vwaddu.vv v8, v16, v20
+; RV64-1024-NEXT: vwmaccu.vx v8, a1, v20
; RV64-1024-NEXT: li a1, 512
; RV64-1024-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; RV64-1024-NEXT: vse16.v v16, (a0)
+; RV64-1024-NEXT: vse16.v v8, (a0)
; RV64-1024-NEXT: ret
;
; RV64-2048-LABEL: interleave512:
; RV64-2048: # %bb.0: # %entry
; RV64-2048-NEXT: li a3, 256
; RV64-2048-NEXT: vsetvli zero, a3, e16, m2, ta, ma
-; RV64-2048-NEXT: vle16.v v8, (a1)
-; RV64-2048-NEXT: vle16.v v10, (a2)
+; RV64-2048-NEXT: vle16.v v12, (a1)
+; RV64-2048-NEXT: vle16.v v14, (a2)
; RV64-2048-NEXT: li a1, -1
-; RV64-2048-NEXT: vwaddu.vv v12, v8, v10
-; RV64-2048-NEXT: vwmaccu.vx v12, a1, v10
+; RV64-2048-NEXT: vwaddu.vv v8, v12, v14
+; RV64-2048-NEXT: vwmaccu.vx v8, a1, v14
; RV64-2048-NEXT: li a1, 512
; RV64-2048-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; RV64-2048-NEXT: vse16.v v12, (a0)
+; RV64-2048-NEXT: vse16.v v8, (a0)
; RV64-2048-NEXT: ret
entry:
%ve = load <256 x i16>, ptr %0, align 512
diff --git a/llvm/test/CodeGen/RISCV/rvv/intrinsic-vector-match.ll b/llvm/test/CodeGen/RISCV/rvv/intrinsic-vector-match.ll
index 7d37d91ee21b5..c5e1852923d54 100644
--- a/llvm/test/CodeGen/RISCV/rvv/intrinsic-vector-match.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/intrinsic-vector-match.ll
@@ -37,12 +37,12 @@ define <vscale x 16 x i1> @match_nxv16i8_v4i8(<vscale x 16 x i8> %op1, <4 x i8>
; CHECK-NEXT: vmseq.vv v14, v8, v12
; CHECK-NEXT: vrgather.vi v12, v10, 0
; CHECK-NEXT: vmseq.vv v15, v8, v12
-; CHECK-NEXT: vmor.mm v12, v15, v14
-; CHECK-NEXT: vrgather.vi v14, v10, 2
-; CHECK-NEXT: vmseq.vv v13, v8, v14
-; CHECK-NEXT: vrgather.vi v14, v10, 3
-; CHECK-NEXT: vmor.mm v10, v12, v13
-; CHECK-NEXT: vmseq.vv v11, v8, v14
+; CHECK-NEXT: vmor.mm v14, v15, v14
+; CHECK-NEXT: vrgather.vi v12, v10, 2
+; CHECK-NEXT: vmseq.vv v15, v8, v12
+; CHECK-NEXT: vrgather.vi v12, v10, 3
+; CHECK-NEXT: vmor.mm v10, v14, v15
+; CHECK-NEXT: vmseq.vv v11, v8, v12
; CHECK-NEXT: vmor.mm v8, v10, v11
; CHECK-NEXT: vmand.mm v0, v8, v0
; CHECK-NEXT: ret
@@ -58,24 +58,24 @@ define <vscale x 16 x i1> @match_nxv16i8_v8i8(<vscale x 16 x i8> %op1, <8 x i8>
; CHECK-NEXT: vmseq.vv v14, v8, v12
; CHECK-NEXT: vrgather.vi v12, v10, 0
; CHECK-NEXT: vmseq.vv v15, v8, v12
-; CHECK-NEXT: vmor.mm v12, v15, v14
-; CHECK-NEXT: vrgather.vi v14, v10, 2
-; CHECK-NEXT: vmseq.vv v13, v8, v14
-; CHECK-NEXT: vmor.mm v12, v12, v13
-; CHECK-NEXT: vrgather.vi v14, v10, 3
-; CHECK-NEXT: vmseq.vv v13, v8, v14
-; CHECK-NEXT: vmor.mm v12, v12, v13
-; CHECK-NEXT: vrgather.vi v14, v10, 4
-; CHECK-NEXT: vmseq.vv v13, v8, v14
-; CHECK-NEXT: vmor.mm v12, v12, v13
-; CHECK-NEXT: vrgather.vi v14, v10, 5
-; CHECK-NEXT: vmseq.vv v13, v8, v14
-; CHECK-NEXT: vmor.mm v12, v12, v13
-; CHECK-NEXT: vrgather.vi v14, v10, 6
-; CHECK-NEXT: vmseq.vv v13, v8, v14
-; CHECK-NEXT: vrgather.vi v14, v10, 7
-; CHECK-NEXT: vmor.mm v10, v12, v13
-; CHECK-NEXT: vmseq.vv v11, v8, v14
+; CHECK-NEXT: vmor.mm v14, v15, v14
+; CHECK-NEXT: vrgather.vi v12, v10, 2
+; CHECK-NEXT: vmseq.vv v15, v8, v12
+; CHECK-NEXT: vmor.mm v14, v14, v15
+; CHECK-NEXT: vrgather.vi v12, v10, 3
+; CHECK-NEXT: vmseq.vv v15, v8, v12
+; CHECK-NEXT: vmor.mm v14, v14, v15
+; CHECK-NEXT: vrgather.vi v12, v10, 4
+; CHECK-NEXT: vmseq.vv v15, v8, v12
+; CHECK-NEXT: vmor.mm v14, v14, v15
+; CHECK-NEXT: vrgather.vi v12, v10, 5
+; CHECK-NEXT: vmseq.vv v15, v8, v12
+; CHECK-NEXT: vmor.mm v14, v14, v15
+; CHECK-NEXT: vrgather.vi v12, v10, 6
+; CHECK-NEXT: vmseq.vv v15, v8, v12
+; CHECK-NEXT: vrgather.vi v12, v10, 7
+; CHECK-NEXT: vmor.mm v10, v14, v15
+; CHECK-NEXT: vmseq.vv v11, v8, v12
; CHECK-NEXT: vmor.mm v8, v10, v11
; CHECK-NEXT: vmand.mm v0, v8, v0
; CHECK-NEXT: ret
@@ -91,48 +91,48 @@ define <vscale x 16 x i1> @match_nxv16i8_v16i8(<vscale x 16 x i8> %op1, <16 x i8
; CHECK-NEXT: vmseq.vv v14, v8, v12
; CHECK-NEXT: vrgather.vi v12, v10, 0
; CHECK-NEXT: vmseq.vv v15, v8, v12
-; CHECK-NEXT: vmor.mm v12, v15, v14
-; CHECK-NEXT: vrgather.vi v14, v10, 2
-; CHECK-NEXT: vmseq.vv v13, v8, v14
-; CHECK-NEXT: vmor.mm v12, v12, v13
-; CHECK-NEXT: vrgather.vi v14, v10, 3
-; CHECK-NEXT: vmseq.vv v13, v8, v14
-; CHECK-NEXT: vmor.mm v12, v12, v13
-; CHECK-NEXT: vrgather.vi v14, v10, 4
-; CHECK-NEXT: vmseq.vv v13, v8, v14
-; CHECK-NEXT: vmor.mm v12, v12, v13
-; CHECK-NEXT: vrgather.vi v14, v10, 5
-; CHECK-NEXT: vmseq.vv v13, v8, v14
-; CHECK-NEXT: vmor.mm v12, v12, v13
-; CHECK-NEXT: vrgather.vi v14, v10, 6
-; CHECK-NEXT: vmseq.vv v13, v8, v14
-; CHECK-NEXT: vmor.mm v12, v12, v13
-; CHECK-NEXT: vrgather.vi v14, v10, 7
-; CHECK-NEXT: vmseq.vv v13, v8, v14
-; CHECK-NEXT: vmor.mm v12, v12, v13
-; CHECK-NEXT: vrgather.vi v14, v10, 8
-; CHECK-NEXT: vmseq.vv v13, v8, v14
-; CHECK-NEXT: vmor.mm v12, v12, v13
-; CHECK-NEXT: vrgather.vi v14, v10, 9
-; CHECK-NEXT: vmseq.vv v13, v8, v14
-; CHECK-NEXT: vmor.mm v12, v12, v13
-; CHECK-NEXT: vrgather.vi v14, v10, 10
-; CHECK-NEXT: vmseq.vv v13, v8, v14
-; CHECK-NEXT: vmor.mm v12, v12, v13
-; CHECK-NEXT: vrgather.vi v14, v10, 11
-; CHECK-NEXT: vmseq.vv v13, v8, v14
-; CHECK-NEXT: vmor.mm v12, v12, v13
-; CHECK-NEXT: vrgather.vi v14, v10, 12
-; CHECK-NEXT: vmseq.vv v13, v8, v14
-; CHECK-NEXT: vmor.mm v12, v12, v13
-; CHECK-NEXT: vrgather.vi v14, v10, 13
-; CHECK-NEXT: vmseq.vv v13, v8, v14
-; CHECK-NEXT: vmor.mm v12, v12, v13
-; CHECK-NEXT: vrgather.vi v14, v10, 14
-; CHECK-NEXT: vmseq.vv v13, v8, v14
-; CHECK-NEXT: vrgather.vi v14, v10, 15
-; CHECK-NEXT: vmor.mm v10, v12, v13
-; CHECK-NEXT: vmseq.vv v11, v8, v14
+; CHECK-NEXT: vmor.mm v14, v15, v14
+; CHECK-NEXT: vrgather.vi v12, v10, 2
+; CHECK-NEXT: vmseq.vv v15, v8, v12
+; CHECK-NEXT: vmor.mm v14, v14, v15
+; CHECK-NEXT: vrgather.vi v12, v10, 3
+; CHECK-NEXT: vmseq.vv v15, v8, v12
+; CHECK-NEXT: vmor.mm v14, v14, v15
+; CHECK-NEXT: vrgather.vi v12, v10, 4
+; CHECK-NEXT: vmseq.vv v15, v8, v12
+; CHECK-NEXT: vmor.mm v14, v14, v15
+; CHECK-NEXT: vrgather.vi v12, v10, 5
+; CHECK-NEXT: vmseq.vv v15, v8, v12
+; CHECK-NEXT: vmor.mm v14, v14, v15
+; CHECK-NEXT: vrgather.vi v12, v10, 6
+; CHECK-NEXT: vmseq.vv v15, v8, v12
+; CHECK-NEXT: vmor.mm v14, v14, v15
+; CHECK-NEXT: vrgather.vi v12, v10, 7
+; CHECK-NEXT: vmseq.vv v15, v8, v12
+; CHECK-NEXT: vmor.mm v14, v14, v15
+; CHECK-NEXT: vrgather.vi v12, v10, 8
+; CHECK-NEXT: vmseq.vv v15, v8, v12
+; CHECK-NEXT: vmor.mm v14, v14, v15
+; CHECK-NEXT: vrgather.vi v12, v10, 9
+; CHECK-NEXT: vmseq.vv v15, v8, v12
+; CHECK-NEXT: vmor.mm v14, v14, v15
+; CHECK-NEXT: vrgather.vi v12, v10, 10
+; CHECK-NEXT: vmseq.vv v15, v8, v12
+; CHECK-NEXT: vmor.mm v14, v14, v15
+; CHECK-NEXT: vrgather.vi v12, v10, 11
+; CHECK-NEXT: vmseq.vv v15, v8, v12
+; CHECK-NEXT: vmor.mm v14, v14, v15
+; CHECK-NEXT: vrgather.vi v12, v10, 12
+; CHECK-NEXT: vmseq.vv v15, v8, v12
+; CHECK-NEXT: vmor.mm v14, v14, v15
+; CHECK-NEXT: vrgather.vi v12, v10, 13
+; CHECK-NEXT: vmseq.vv v15, v8, v12
+; CHECK-NEXT: vmor.mm v14, v14, v15
+; CHECK-NEXT: vrgather.vi v12, v10, 14
+; CHECK-NEXT: vmseq.vv v15, v8, v12
+; CHECK-NEXT: vrgather.vi v12, v10, 15
+; CHECK-NEXT: vmor.mm v10, v14, v15
+; CHECK-NEXT: vmseq.vv v11, v8, v12
; CHECK-NEXT: vmor.mm v8, v10, v11
; CHECK-NEXT: vmand.mm v0, v8, v0
; CHECK-NEXT: ret
@@ -319,24 +319,24 @@ define <vscale x 8 x i1> @match_nxv8i16_v8i16(<vscale x 8 x i16> %op1, <8 x i16>
; CHECK-NEXT: vmseq.vv v14, v8, v12
; CHECK-NEXT: vrgather.vi v12, v10, 0
; CHECK-NEXT: vmseq.vv v15, v8, v12
-; CHECK-NEXT: vmor.mm v12, v15, v14
-; CHECK-NEXT: vrgather.vi v14, v10, 2
-; CHECK-NEXT: vmseq.vv v13, v8, v14
-; CHECK-NEXT: vmor.mm v12, v12, v13
-; CHECK-NEXT: vrgather.vi v14, v10, 3
-; CHECK-NEXT: vmseq.vv v13, v8, v14
-; CHECK-NEXT: vmor.mm v12, v12, v13
-; CHECK-NEXT: vrgather.vi v14, v10, 4
-; CHECK-NEXT: vmseq.vv v13, v8, v14
-; CHECK-NEXT: vmor.mm v12, v12, v13
-; CHECK-NEXT: vrgather.vi v14, v10, 5
-; CHECK-NEXT: vmseq.vv v13, v8, v14
-; CHECK-NEXT: vmor.mm v12, v12, v13
-; CHECK-NEXT: vrgather.vi v14, v10, 6
-; CHECK-NEXT: vmseq.vv v13, v8, v14
-; CHECK-NEXT: vrgather.vi v14, v10, 7
-; CHECK-NEXT: vmor.mm v10, v12, v13
-; CHECK-NEXT: vmseq.vv v11, v8, v14
+; CHECK-NEXT: vmor.mm v14, v15, v14
+; CHECK-NEXT: vrgather.vi v12, v10, 2
+; CHECK-NEXT: vmseq.vv v15, v8, v12
+; CHECK-NEXT: vmor.mm v14, v14, v15
+; CHECK-NEXT: vrgather.vi v12, v10, 3
+; CHECK-NEXT: vmseq.vv v15, v8, v12
+; CHECK-NEXT: vmor.mm v14, v14, v15
+; CHECK-NEXT: vrgather.vi v12, v10, 4
+; CHECK-NEXT: vmseq.vv v15, v8, v12
+; CHECK-NEXT: vmor.mm v14, v14, v15
+; CHECK-NEXT: vrgather.vi v12, v10, 5
+; CHECK-NEXT: vmseq.vv v15, v8, v12
+; CHECK-NEXT: vmor.mm v14, v14, v15
+; CHECK-NEXT: vrgather.vi v12, v10, 6
+; CHECK-NEXT: vmseq.vv v15, v8, v12
+; CHECK-NEXT: vrgather.vi v12, v10, 7
+; CHECK-NEXT: vmor.mm v10, v14, v15
+; CHECK-NEXT: vmseq.vv v11, v8, v12
; CHECK-NEXT: vmor.mm v8, v10, v11
; CHECK-NEXT: vmand.mm v0, v8, v0
; CHECK-NEXT: ret
@@ -966,12 +966,12 @@ define <vscale x 4 x i1> @match_nxv4xi32_v4i32(<vscale x 4 x i32> %op1, <4 x i32
; CHECK-NEXT: vmseq.vv v14, v8, v12
; CHECK-NEXT: vrgather.vi v12, v10, 0
; CHECK-NEXT: vmseq.vv v15, v8, v12
-; CHECK-NEXT: vmor.mm v12, v15, v14
-; CHECK-NEXT: vrgather.vi v14, v10, 2
-; CHECK-NEXT: vmseq.vv v13, v8, v14
-; CHECK-NEXT: vrgather.vi v14, v10, 3
-; CHECK-NEXT: vmor.mm v10, v12, v13
-; CHECK-NEXT: vmseq.vv v11, v8, v14
+; CHECK-NEXT: vmor.mm v14, v15, v14
+; CHECK-NEXT: vrgather.vi v12, v10, 2
+; CHECK-NEXT: vmseq.vv v15, v8, v12
+; CHECK-NEXT: vrgather.vi v12, v10, 3
+; CHECK-NEXT: vmor.mm v10, v14, v15
+; CHECK-NEXT: vmseq.vv v11, v8, v12
; CHECK-NEXT: vmor.mm v8, v10, v11
; CHECK-NEXT: vmand.mm v0, v8, v0
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/llrint-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/llrint-sdnode.ll
index 818abb9f4a009..66c64c6b275f4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/llrint-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/llrint-sdnode.ll
@@ -20,8 +20,8 @@ define <vscale x 2 x i64> @llrint_nxv2i64_nxv2f32(<vscale x 2 x float> %x) {
; CHECK-LABEL: llrint_nxv2i64_nxv2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.x.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.x.f.v v8, v10
; CHECK-NEXT: ret
%a = call <vscale x 2 x i64> @llvm.llrint.nxv2i64.nxv2f32(<vscale x 2 x float> %x)
ret <vscale x 2 x i64> %a
@@ -32,8 +32,8 @@ define <vscale x 4 x i64> @llrint_nxv4i64_nxv4f32(<vscale x 4 x float> %x) {
; CHECK-LABEL: llrint_nxv4i64_nxv4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfwcvt.x.f.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.x.f.v v8, v12
; CHECK-NEXT: ret
%a = call <vscale x 4 x i64> @llvm.llrint.nxv4i64.nxv4f32(<vscale x 4 x float> %x)
ret <vscale x 4 x i64> %a
@@ -44,8 +44,8 @@ define <vscale x 8 x i64> @llrint_nxv8i64_nxv8f32(<vscale x 8 x float> %x) {
; CHECK-LABEL: llrint_nxv8i64_nxv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfwcvt.x.f.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.x.f.v v8, v16
; CHECK-NEXT: ret
%a = call <vscale x 8 x i64> @llvm.llrint.nxv8i64.nxv8f32(<vscale x 8 x float> %x)
ret <vscale x 8 x i64> %a
diff --git a/llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll
index 166dba6a56524..dbe2d03e1a909 100644
--- a/llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll
@@ -20,8 +20,8 @@ define <vscale x 2 x i64> @llrint_nxv2i64_nxv2f32(<vscale x 2 x float> %x, <vsca
; CHECK-LABEL: llrint_nxv2i64_nxv2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.x.f.v v8, v10, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 2 x i64> @llvm.vp.llrint.nxv2i64.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i64> %a
@@ -32,8 +32,8 @@ define <vscale x 4 x i64> @llrint_nxv4i64_nxv4f32(<vscale x 4 x float> %x, <vsca
; CHECK-LABEL: llrint_nxv4i64_nxv4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vfwcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.x.f.v v8, v12, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 4 x i64> @llvm.vp.llrint.nxv4i64.nxv4f32(<vscale x 4 x float> %x, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i64> %a
@@ -44,8 +44,8 @@ define <vscale x 8 x i64> @llrint_nxv8i64_nxv8f32(<vscale x 8 x float> %x, <vsca
; CHECK-LABEL: llrint_nxv8i64_nxv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vfwcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.x.f.v v8, v16, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 8 x i64> @llvm.vp.llrint.nxv8i64.nxv8f32(<vscale x 8 x float> %x, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i64> %a
diff --git a/llvm/test/CodeGen/RISCV/rvv/lrint-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/lrint-sdnode.ll
index a9668dff6055a..534fdf21675ce 100644
--- a/llvm/test/CodeGen/RISCV/rvv/lrint-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/lrint-sdnode.ll
@@ -46,8 +46,8 @@ define <vscale x 2 x iXLen> @lrint_nxv2f32(<vscale x 2 x float> %x) {
; RV64-i64-LABEL: lrint_nxv2f32:
; RV64-i64: # %bb.0:
; RV64-i64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; RV64-i64-NEXT: vfwcvt.x.f.v v10, v8
-; RV64-i64-NEXT: vmv2r.v v8, v10
+; RV64-i64-NEXT: vmv1r.v v10, v8
+; RV64-i64-NEXT: vfwcvt.x.f.v v8, v10
; RV64-i64-NEXT: ret
%a = call <vscale x 2 x iXLen> @llvm.lrint.nxv2iXLen.nxv2f32(<vscale x 2 x float> %x)
ret <vscale x 2 x iXLen> %a
@@ -70,8 +70,8 @@ define <vscale x 4 x iXLen> @lrint_nxv4f32(<vscale x 4 x float> %x) {
; RV64-i64-LABEL: lrint_nxv4f32:
; RV64-i64: # %bb.0:
; RV64-i64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV64-i64-NEXT: vfwcvt.x.f.v v12, v8
-; RV64-i64-NEXT: vmv4r.v v8, v12
+; RV64-i64-NEXT: vmv2r.v v12, v8
+; RV64-i64-NEXT: vfwcvt.x.f.v v8, v12
; RV64-i64-NEXT: ret
%a = call <vscale x 4 x iXLen> @llvm.lrint.nxv4iXLen.nxv4f32(<vscale x 4 x float> %x)
ret <vscale x 4 x iXLen> %a
@@ -94,8 +94,8 @@ define <vscale x 8 x iXLen> @lrint_nxv8f32(<vscale x 8 x float> %x) {
; RV64-i64-LABEL: lrint_nxv8f32:
; RV64-i64: # %bb.0:
; RV64-i64-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; RV64-i64-NEXT: vfwcvt.x.f.v v16, v8
-; RV64-i64-NEXT: vmv8r.v v8, v16
+; RV64-i64-NEXT: vmv4r.v v16, v8
+; RV64-i64-NEXT: vfwcvt.x.f.v v8, v16
; RV64-i64-NEXT: ret
%a = call <vscale x 8 x iXLen> @llvm.lrint.nxv8iXLen.nxv8f32(<vscale x 8 x float> %x)
ret <vscale x 8 x iXLen> %a
diff --git a/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll
index 21045b69a8b5d..98d32b36c23c1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll
@@ -46,8 +46,8 @@ define <vscale x 2 x iXLen> @lrint_nxv2f32(<vscale x 2 x float> %x, <vscale x 2
; RV64-i64-LABEL: lrint_nxv2f32:
; RV64-i64: # %bb.0:
; RV64-i64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-i64-NEXT: vfwcvt.x.f.v v10, v8, v0.t
-; RV64-i64-NEXT: vmv2r.v v8, v10
+; RV64-i64-NEXT: vmv1r.v v10, v8
+; RV64-i64-NEXT: vfwcvt.x.f.v v8, v10, v0.t
; RV64-i64-NEXT: ret
%a = call <vscale x 2 x iXLen> @llvm.vp.lrint.nxv2iXLen.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x iXLen> %a
@@ -70,8 +70,8 @@ define <vscale x 4 x iXLen> @lrint_nxv4f32(<vscale x 4 x float> %x, <vscale x 4
; RV64-i64-LABEL: lrint_nxv4f32:
; RV64-i64: # %bb.0:
; RV64-i64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-i64-NEXT: vfwcvt.x.f.v v12, v8, v0.t
-; RV64-i64-NEXT: vmv4r.v v8, v12
+; RV64-i64-NEXT: vmv2r.v v12, v8
+; RV64-i64-NEXT: vfwcvt.x.f.v v8, v12, v0.t
; RV64-i64-NEXT: ret
%a = call <vscale x 4 x iXLen> @llvm.vp.lrint.nxv4iXLen.nxv4f32(<vscale x 4 x float> %x, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x iXLen> %a
@@ -94,8 +94,8 @@ define <vscale x 8 x iXLen> @lrint_nxv8f32(<vscale x 8 x float> %x, <vscale x 8
; RV64-i64-LABEL: lrint_nxv8f32:
; RV64-i64: # %bb.0:
; RV64-i64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV64-i64-NEXT: vfwcvt.x.f.v v16, v8, v0.t
-; RV64-i64-NEXT: vmv8r.v v8, v16
+; RV64-i64-NEXT: vmv4r.v v16, v8
+; RV64-i64-NEXT: vfwcvt.x.f.v v8, v16, v0.t
; RV64-i64-NEXT: ret
%a = call <vscale x 8 x iXLen> @llvm.vp.lrint.nxv8iXLen.nxv8f32(<vscale x 8 x float> %x, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x iXLen> %a
diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
index 72ea0be5da28d..2757e140ecde5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
@@ -140,10 +140,10 @@ define <vscale x 2 x i64> @mgather_nxv2i8_sextload_nxv2i64(<vscale x 2 x ptr> %p
; RV32-LABEL: mgather_nxv2i8_sextload_nxv2i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
-; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
+; RV32-NEXT: vmv1r.v v10, v9
+; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV32-NEXT: vsext.vf8 v10, v9
-; RV32-NEXT: vmv.v.v v8, v10
+; RV32-NEXT: vsext.vf8 v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: mgather_nxv2i8_sextload_nxv2i64:
@@ -162,10 +162,10 @@ define <vscale x 2 x i64> @mgather_nxv2i8_zextload_nxv2i64(<vscale x 2 x ptr> %p
; RV32-LABEL: mgather_nxv2i8_zextload_nxv2i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
-; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
+; RV32-NEXT: vmv1r.v v10, v9
+; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV32-NEXT: vzext.vf8 v10, v9
-; RV32-NEXT: vmv.v.v v8, v10
+; RV32-NEXT: vzext.vf8 v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: mgather_nxv2i8_zextload_nxv2i64:
@@ -363,10 +363,10 @@ define <vscale x 2 x i64> @mgather_nxv2i16_sextload_nxv2i64(<vscale x 2 x ptr> %
; RV32-LABEL: mgather_nxv2i16_sextload_nxv2i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu
-; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
+; RV32-NEXT: vmv1r.v v10, v9
+; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV32-NEXT: vsext.vf4 v10, v9
-; RV32-NEXT: vmv.v.v v8, v10
+; RV32-NEXT: vsext.vf4 v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: mgather_nxv2i16_sextload_nxv2i64:
@@ -385,10 +385,10 @@ define <vscale x 2 x i64> @mgather_nxv2i16_zextload_nxv2i64(<vscale x 2 x ptr> %
; RV32-LABEL: mgather_nxv2i16_zextload_nxv2i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu
-; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
+; RV32-NEXT: vmv1r.v v10, v9
+; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV32-NEXT: vzext.vf4 v10, v9
-; RV32-NEXT: vmv.v.v v8, v10
+; RV32-NEXT: vzext.vf4 v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: mgather_nxv2i16_zextload_nxv2i64:
@@ -610,10 +610,10 @@ define <vscale x 2 x i64> @mgather_nxv2i32_sextload_nxv2i64(<vscale x 2 x ptr> %
; RV32-LABEL: mgather_nxv2i32_sextload_nxv2i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu
-; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
+; RV32-NEXT: vmv1r.v v10, v9
+; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV32-NEXT: vsext.vf2 v10, v9
-; RV32-NEXT: vmv.v.v v8, v10
+; RV32-NEXT: vsext.vf2 v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: mgather_nxv2i32_sextload_nxv2i64:
@@ -632,10 +632,10 @@ define <vscale x 2 x i64> @mgather_nxv2i32_zextload_nxv2i64(<vscale x 2 x ptr> %
; RV32-LABEL: mgather_nxv2i32_zextload_nxv2i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu
-; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
+; RV32-NEXT: vmv1r.v v10, v9
+; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV32-NEXT: vzext.vf2 v10, v9
-; RV32-NEXT: vmv.v.v v8, v10
+; RV32-NEXT: vzext.vf2 v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: mgather_nxv2i32_zextload_nxv2i64:
@@ -941,8 +941,8 @@ define <vscale x 4 x i64> @mgather_truemask_nxv4i64(<vscale x 4 x ptr> %ptrs, <v
; RV32-LABEL: mgather_truemask_nxv4i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; RV32-NEXT: vluxei32.v v12, (zero), v8
-; RV32-NEXT: vmv.v.v v8, v12
+; RV32-NEXT: vmv2r.v v12, v8
+; RV32-NEXT: vluxei32.v v8, (zero), v12
; RV32-NEXT: ret
;
; RV64-LABEL: mgather_truemask_nxv4i64:
@@ -1999,8 +1999,8 @@ define <vscale x 4 x double> @mgather_truemask_nxv4f64(<vscale x 4 x ptr> %ptrs,
; RV32-LABEL: mgather_truemask_nxv4f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; RV32-NEXT: vluxei32.v v12, (zero), v8
-; RV32-NEXT: vmv.v.v v8, v12
+; RV32-NEXT: vmv2r.v v12, v8
+; RV32-NEXT: vluxei32.v v8, (zero), v12
; RV32-NEXT: ret
;
; RV64-LABEL: mgather_truemask_nxv4f64:
diff --git a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll
index 9298e8b520bd9..d995a31f243d3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll
@@ -206,14 +206,14 @@ define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) {
; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0
; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb
-; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0
+; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v10, v8, 1, v0
; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1
; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV32-BITS-UNKNOWN-NEXT: vid.v v10
-; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v10, v10, a0
+; RV32-BITS-UNKNOWN-NEXT: vid.v v8
+; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v8, v8, a0
; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v8, v10
-; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v9, 0
+; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v11, v10, v8
+; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v11, 0
; RV32-BITS-UNKNOWN-NEXT: ret
;
; RV32-BITS-256-LABEL: reverse_nxv8i1:
@@ -247,14 +247,14 @@ define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) {
; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0
; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
-; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0
+; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v10, v8, 1, v0
; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64-BITS-UNKNOWN-NEXT: vid.v v10
-; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v10, a0
+; RV64-BITS-UNKNOWN-NEXT: vid.v v8
+; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v8, v8, a0
; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v8, v10
-; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v9, 0
+; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v11, v10, v8
+; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v11, 0
; RV64-BITS-UNKNOWN-NEXT: ret
;
; RV64-BITS-256-LABEL: reverse_nxv8i1:
@@ -310,38 +310,38 @@ define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) {
; RV32-BITS-256: # %bb.0:
; RV32-BITS-256-NEXT: csrr a0, vlenb
; RV32-BITS-256-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; RV32-BITS-256-NEXT: vid.v v8
+; RV32-BITS-256-NEXT: vid.v v10
; RV32-BITS-256-NEXT: vsetvli a1, zero, e8, m2, ta, ma
-; RV32-BITS-256-NEXT: vmv.v.i v10, 0
+; RV32-BITS-256-NEXT: vmv.v.i v8, 0
; RV32-BITS-256-NEXT: addi a0, a0, -1
; RV32-BITS-256-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; RV32-BITS-256-NEXT: vrsub.vx v8, v8, a0
+; RV32-BITS-256-NEXT: vrsub.vx v12, v10, a0
; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m2, ta, ma
-; RV32-BITS-256-NEXT: vmerge.vim v10, v10, 1, v0
+; RV32-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0
; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m1, ta, ma
-; RV32-BITS-256-NEXT: vrgather.vv v13, v10, v8
-; RV32-BITS-256-NEXT: vrgather.vv v12, v11, v8
+; RV32-BITS-256-NEXT: vrgather.vv v11, v8, v12
+; RV32-BITS-256-NEXT: vrgather.vv v10, v9, v12
; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m2, ta, ma
-; RV32-BITS-256-NEXT: vmsne.vi v0, v12, 0
+; RV32-BITS-256-NEXT: vmsne.vi v0, v10, 0
; RV32-BITS-256-NEXT: ret
;
; RV32-BITS-512-LABEL: reverse_nxv16i1:
; RV32-BITS-512: # %bb.0:
; RV32-BITS-512-NEXT: csrr a0, vlenb
; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; RV32-BITS-512-NEXT: vid.v v8
+; RV32-BITS-512-NEXT: vid.v v10
; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, m2, ta, ma
-; RV32-BITS-512-NEXT: vmv.v.i v10, 0
+; RV32-BITS-512-NEXT: vmv.v.i v8, 0
; RV32-BITS-512-NEXT: addi a0, a0, -1
; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; RV32-BITS-512-NEXT: vrsub.vx v8, v8, a0
+; RV32-BITS-512-NEXT: vrsub.vx v12, v10, a0
; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m2, ta, ma
-; RV32-BITS-512-NEXT: vmerge.vim v10, v10, 1, v0
+; RV32-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0
; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m1, ta, ma
-; RV32-BITS-512-NEXT: vrgather.vv v13, v10, v8
-; RV32-BITS-512-NEXT: vrgather.vv v12, v11, v8
+; RV32-BITS-512-NEXT: vrgather.vv v11, v8, v12
+; RV32-BITS-512-NEXT: vrgather.vv v10, v9, v12
; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m2, ta, ma
-; RV32-BITS-512-NEXT: vmsne.vi v0, v12, 0
+; RV32-BITS-512-NEXT: vmsne.vi v0, v10, 0
; RV32-BITS-512-NEXT: ret
;
; RV64-BITS-UNKNOWN-LABEL: reverse_nxv16i1:
@@ -367,38 +367,38 @@ define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) {
; RV64-BITS-256: # %bb.0:
; RV64-BITS-256-NEXT: csrr a0, vlenb
; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; RV64-BITS-256-NEXT: vid.v v8
+; RV64-BITS-256-NEXT: vid.v v10
; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m2, ta, ma
-; RV64-BITS-256-NEXT: vmv.v.i v10, 0
+; RV64-BITS-256-NEXT: vmv.v.i v8, 0
; RV64-BITS-256-NEXT: addi a0, a0, -1
; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; RV64-BITS-256-NEXT: vrsub.vx v8, v8, a0
+; RV64-BITS-256-NEXT: vrsub.vx v12, v10, a0
; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m2, ta, ma
-; RV64-BITS-256-NEXT: vmerge.vim v10, v10, 1, v0
+; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m1, ta, ma
-; RV64-BITS-256-NEXT: vrgather.vv v13, v10, v8
-; RV64-BITS-256-NEXT: vrgather.vv v12, v11, v8
+; RV64-BITS-256-NEXT: vrgather.vv v11, v8, v12
+; RV64-BITS-256-NEXT: vrgather.vv v10, v9, v12
; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m2, ta, ma
-; RV64-BITS-256-NEXT: vmsne.vi v0, v12, 0
+; RV64-BITS-256-NEXT: vmsne.vi v0, v10, 0
; RV64-BITS-256-NEXT: ret
;
; RV64-BITS-512-LABEL: reverse_nxv16i1:
; RV64-BITS-512: # %bb.0:
; RV64-BITS-512-NEXT: csrr a0, vlenb
; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; RV64-BITS-512-NEXT: vid.v v8
+; RV64-BITS-512-NEXT: vid.v v10
; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m2, ta, ma
-; RV64-BITS-512-NEXT: vmv.v.i v10, 0
+; RV64-BITS-512-NEXT: vmv.v.i v8, 0
; RV64-BITS-512-NEXT: addi a0, a0, -1
; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; RV64-BITS-512-NEXT: vrsub.vx v8, v8, a0
+; RV64-BITS-512-NEXT: vrsub.vx v12, v10, a0
; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m2, ta, ma
-; RV64-BITS-512-NEXT: vmerge.vim v10, v10, 1, v0
+; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m1, ta, ma
-; RV64-BITS-512-NEXT: vrgather.vv v13, v10, v8
-; RV64-BITS-512-NEXT: vrgather.vv v12, v11, v8
+; RV64-BITS-512-NEXT: vrgather.vv v11, v8, v12
+; RV64-BITS-512-NEXT: vrgather.vv v10, v9, v12
; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m2, ta, ma
-; RV64-BITS-512-NEXT: vmsne.vi v0, v12, 0
+; RV64-BITS-512-NEXT: vmsne.vi v0, v10, 0
; RV64-BITS-512-NEXT: ret
%res = call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> %a)
ret <vscale x 16 x i1> %res
@@ -409,14 +409,14 @@ define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) {
; RV32-BITS-UNKNOWN: # %bb.0:
; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb
; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; RV32-BITS-UNKNOWN-NEXT: vid.v v8
+; RV32-BITS-UNKNOWN-NEXT: vid.v v12
; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e8, m4, ta, ma
-; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v12, 0
+; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0
; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1
; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v16, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v16, v12, a0
; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m4, ta, ma
-; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v12, v12, 1, v0
+; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v12, v8, 1, v0
; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v11, v12, v16
; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v13, v16
@@ -430,14 +430,14 @@ define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) {
; RV32-BITS-256: # %bb.0:
; RV32-BITS-256-NEXT: csrr a0, vlenb
; RV32-BITS-256-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; RV32-BITS-256-NEXT: vid.v v8
+; RV32-BITS-256-NEXT: vid.v v12
; RV32-BITS-256-NEXT: vsetvli a1, zero, e8, m4, ta, ma
-; RV32-BITS-256-NEXT: vmv.v.i v12, 0
+; RV32-BITS-256-NEXT: vmv.v.i v8, 0
; RV32-BITS-256-NEXT: addi a0, a0, -1
; RV32-BITS-256-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; RV32-BITS-256-NEXT: vrsub.vx v16, v8, a0
+; RV32-BITS-256-NEXT: vrsub.vx v16, v12, a0
; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m4, ta, ma
-; RV32-BITS-256-NEXT: vmerge.vim v12, v12, 1, v0
+; RV32-BITS-256-NEXT: vmerge.vim v12, v8, 1, v0
; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; RV32-BITS-256-NEXT: vrgather.vv v11, v12, v16
; RV32-BITS-256-NEXT: vrgather.vv v10, v13, v16
@@ -451,14 +451,14 @@ define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) {
; RV32-BITS-512: # %bb.0:
; RV32-BITS-512-NEXT: csrr a0, vlenb
; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; RV32-BITS-512-NEXT: vid.v v8
+; RV32-BITS-512-NEXT: vid.v v12
; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, ma
-; RV32-BITS-512-NEXT: vmv.v.i v12, 0
+; RV32-BITS-512-NEXT: vmv.v.i v8, 0
; RV32-BITS-512-NEXT: addi a0, a0, -1
; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; RV32-BITS-512-NEXT: vrsub.vx v16, v8, a0
+; RV32-BITS-512-NEXT: vrsub.vx v16, v12, a0
; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma
-; RV32-BITS-512-NEXT: vmerge.vim v12, v12, 1, v0
+; RV32-BITS-512-NEXT: vmerge.vim v12, v8, 1, v0
; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; RV32-BITS-512-NEXT: vrgather.vv v11, v12, v16
; RV32-BITS-512-NEXT: vrgather.vv v10, v13, v16
@@ -472,14 +472,14 @@ define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) {
; RV64-BITS-UNKNOWN: # %bb.0:
; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; RV64-BITS-UNKNOWN-NEXT: vid.v v8
+; RV64-BITS-UNKNOWN-NEXT: vid.v v12
; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e8, m4, ta, ma
-; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v12, 0
+; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0
; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v16, v8, a0
+; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v16, v12, a0
; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m4, ta, ma
-; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v12, v12, 1, v0
+; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v12, v8, 1, v0
; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v11, v12, v16
; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v13, v16
@@ -493,14 +493,14 @@ define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) {
; RV64-BITS-256: # %bb.0:
; RV64-BITS-256-NEXT: csrr a0, vlenb
; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; RV64-BITS-256-NEXT: vid.v v8
+; RV64-BITS-256-NEXT: vid.v v12
; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m4, ta, ma
-; RV64-BITS-256-NEXT: vmv.v.i v12, 0
+; RV64-BITS-256-NEXT: vmv.v.i v8, 0
; RV64-BITS-256-NEXT: addi a0, a0, -1
; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; RV64-BITS-256-NEXT: vrsub.vx v16, v8, a0
+; RV64-BITS-256-NEXT: vrsub.vx v16, v12, a0
; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m4, ta, ma
-; RV64-BITS-256-NEXT: vmerge.vim v12, v12, 1, v0
+; RV64-BITS-256-NEXT: vmerge.vim v12, v8, 1, v0
; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; RV64-BITS-256-NEXT: vrgather.vv v11, v12, v16
; RV64-BITS-256-NEXT: vrgather.vv v10, v13, v16
@@ -514,14 +514,14 @@ define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) {
; RV64-BITS-512: # %bb.0:
; RV64-BITS-512-NEXT: csrr a0, vlenb
; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; RV64-BITS-512-NEXT: vid.v v8
+; RV64-BITS-512-NEXT: vid.v v12
; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, ma
-; RV64-BITS-512-NEXT: vmv.v.i v12, 0
+; RV64-BITS-512-NEXT: vmv.v.i v8, 0
; RV64-BITS-512-NEXT: addi a0, a0, -1
; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; RV64-BITS-512-NEXT: vrsub.vx v16, v8, a0
+; RV64-BITS-512-NEXT: vrsub.vx v16, v12, a0
; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma
-; RV64-BITS-512-NEXT: vmerge.vim v12, v12, 1, v0
+; RV64-BITS-512-NEXT: vmerge.vim v12, v8, 1, v0
; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; RV64-BITS-512-NEXT: vrgather.vv v11, v12, v16
; RV64-BITS-512-NEXT: vrgather.vv v10, v13, v16
@@ -539,23 +539,23 @@ define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) {
; RV32-BITS-UNKNOWN: # %bb.0:
; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb
; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; RV32-BITS-UNKNOWN-NEXT: vid.v v8
+; RV32-BITS-UNKNOWN-NEXT: vid.v v16
; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e8, m8, ta, ma
-; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v24, 0
+; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0
; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1
; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v16, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v24, v16, a0
; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma
-; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v24, v24, 1, v0
+; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v16, v8, 1, v0
; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m1, ta, ma
-; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v15, v24, v16
-; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v14, v25, v16
-; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v13, v26, v16
-; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v12, v27, v16
-; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v11, v28, v16
-; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v29, v16
-; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v30, v16
-; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v8, v31, v16
+; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v15, v16, v24
+; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v14, v17, v24
+; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v13, v18, v24
+; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v12, v19, v24
+; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v11, v20, v24
+; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v21, v24
+; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v22, v24
+; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v8, v23, v24
; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma
; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
; RV32-BITS-UNKNOWN-NEXT: ret
@@ -564,14 +564,14 @@ define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) {
; RV32-BITS-256: # %bb.0:
; RV32-BITS-256-NEXT: csrr a0, vlenb
; RV32-BITS-256-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; RV32-BITS-256-NEXT: vid.v v8
+; RV32-BITS-256-NEXT: vid.v v16
; RV32-BITS-256-NEXT: vsetvli a1, zero, e8, m8, ta, ma
-; RV32-BITS-256-NEXT: vmv.v.i v16, 0
+; RV32-BITS-256-NEXT: vmv.v.i v8, 0
; RV32-BITS-256-NEXT: addi a0, a0, -1
; RV32-BITS-256-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; RV32-BITS-256-NEXT: vrsub.vx v24, v8, a0
+; RV32-BITS-256-NEXT: vrsub.vx v24, v16, a0
; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m8, ta, ma
-; RV32-BITS-256-NEXT: vmerge.vim v16, v16, 1, v0
+; RV32-BITS-256-NEXT: vmerge.vim v16, v8, 1, v0
; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; RV32-BITS-256-NEXT: vrgather.vv v15, v16, v24
; RV32-BITS-256-NEXT: vrgather.vv v14, v17, v24
@@ -589,14 +589,14 @@ define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) {
; RV32-BITS-512: # %bb.0:
; RV32-BITS-512-NEXT: csrr a0, vlenb
; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; RV32-BITS-512-NEXT: vid.v v8
+; RV32-BITS-512-NEXT: vid.v v16
; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, m8, ta, ma
-; RV32-BITS-512-NEXT: vmv.v.i v16, 0
+; RV32-BITS-512-NEXT: vmv.v.i v8, 0
; RV32-BITS-512-NEXT: addi a0, a0, -1
; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; RV32-BITS-512-NEXT: vrsub.vx v24, v8, a0
+; RV32-BITS-512-NEXT: vrsub.vx v24, v16, a0
; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, ma
-; RV32-BITS-512-NEXT: vmerge.vim v16, v16, 1, v0
+; RV32-BITS-512-NEXT: vmerge.vim v16, v8, 1, v0
; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; RV32-BITS-512-NEXT: vrgather.vv v15, v16, v24
; RV32-BITS-512-NEXT: vrgather.vv v14, v17, v24
@@ -614,23 +614,23 @@ define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) {
; RV64-BITS-UNKNOWN: # %bb.0:
; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; RV64-BITS-UNKNOWN-NEXT: vid.v v8
+; RV64-BITS-UNKNOWN-NEXT: vid.v v16
; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e8, m8, ta, ma
-; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v24, 0
+; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0
; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v16, v8, a0
+; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v24, v16, a0
; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma
-; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v24, v24, 1, v0
+; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v16, v8, 1, v0
; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m1, ta, ma
-; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v15, v24, v16
-; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v14, v25, v16
-; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v13, v26, v16
-; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v12, v27, v16
-; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v11, v28, v16
-; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v29, v16
-; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v30, v16
-; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v8, v31, v16
+; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v15, v16, v24
+; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v14, v17, v24
+; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v13, v18, v24
+; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v12, v19, v24
+; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v11, v20, v24
+; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v21, v24
+; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v22, v24
+; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v8, v23, v24
; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
; RV64-BITS-UNKNOWN-NEXT: ret
@@ -639,14 +639,14 @@ define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) {
; RV64-BITS-256: # %bb.0:
; RV64-BITS-256-NEXT: csrr a0, vlenb
; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; RV64-BITS-256-NEXT: vid.v v8
+; RV64-BITS-256-NEXT: vid.v v16
; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m8, ta, ma
-; RV64-BITS-256-NEXT: vmv.v.i v16, 0
+; RV64-BITS-256-NEXT: vmv.v.i v8, 0
; RV64-BITS-256-NEXT: addi a0, a0, -1
; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; RV64-BITS-256-NEXT: vrsub.vx v24, v8, a0
+; RV64-BITS-256-NEXT: vrsub.vx v24, v16, a0
; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m8, ta, ma
-; RV64-BITS-256-NEXT: vmerge.vim v16, v16, 1, v0
+; RV64-BITS-256-NEXT: vmerge.vim v16, v8, 1, v0
; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; RV64-BITS-256-NEXT: vrgather.vv v15, v16, v24
; RV64-BITS-256-NEXT: vrgather.vv v14, v17, v24
@@ -664,14 +664,14 @@ define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) {
; RV64-BITS-512: # %bb.0:
; RV64-BITS-512-NEXT: csrr a0, vlenb
; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; RV64-BITS-512-NEXT: vid.v v8
+; RV64-BITS-512-NEXT: vid.v v16
; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m8, ta, ma
-; RV64-BITS-512-NEXT: vmv.v.i v16, 0
+; RV64-BITS-512-NEXT: vmv.v.i v8, 0
; RV64-BITS-512-NEXT: addi a0, a0, -1
; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; RV64-BITS-512-NEXT: vrsub.vx v24, v8, a0
+; RV64-BITS-512-NEXT: vrsub.vx v24, v16, a0
; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, ma
-; RV64-BITS-512-NEXT: vmerge.vim v16, v16, 1, v0
+; RV64-BITS-512-NEXT: vmerge.vim v16, v8, 1, v0
; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; RV64-BITS-512-NEXT: vrgather.vv v15, v16, v24
; RV64-BITS-512-NEXT: vrgather.vv v14, v17, v24
@@ -1969,18 +1969,18 @@ define <vscale x 6 x i64> @reverse_nxv6i64(<vscale x 6 x i64> %a) {
; CHECK-NEXT: vid.v v16
; CHECK-NEXT: srli a0, a0, 3
; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vrsub.vx v22, v16, a0
-; CHECK-NEXT: vrgather.vv v21, v10, v22
-; CHECK-NEXT: vrgather.vv v19, v12, v22
-; CHECK-NEXT: vrgather.vv v18, v13, v22
-; CHECK-NEXT: vrgather.vv v20, v11, v22
+; CHECK-NEXT: vrsub.vx v24, v16, a0
+; CHECK-NEXT: vrgather.vv v21, v10, v24
+; CHECK-NEXT: vrgather.vv v19, v12, v24
+; CHECK-NEXT: vrgather.vv v18, v13, v24
+; CHECK-NEXT: vrgather.vv v20, v11, v24
; CHECK-NEXT: vmv2r.v v16, v18
; CHECK-NEXT: vmv2r.v v18, v20
-; CHECK-NEXT: vrgather.vv v31, v8, v22
-; CHECK-NEXT: vrgather.vv v30, v9, v22
-; CHECK-NEXT: vrgather.vv v9, v14, v22
-; CHECK-NEXT: vrgather.vv v8, v15, v22
-; CHECK-NEXT: vmv2r.v v20, v30
+; CHECK-NEXT: vrgather.vv v23, v8, v24
+; CHECK-NEXT: vrgather.vv v22, v9, v24
+; CHECK-NEXT: vrgather.vv v9, v14, v24
+; CHECK-NEXT: vrgather.vv v8, v15, v24
+; CHECK-NEXT: vmv2r.v v20, v22
; CHECK-NEXT: vmv8r.v v8, v16
; CHECK-NEXT: ret
%res = call <vscale x 6 x i64> @llvm.vector.reverse.nxv6i64(<vscale x 6 x i64> %a)
@@ -2007,23 +2007,23 @@ define <vscale x 12 x i64> @reverse_nxv12i64(<vscale x 12 x i64> %a) {
; RV32-NEXT: vid.v v20
; RV32-NEXT: srli a1, a0, 3
; RV32-NEXT: addi a1, a1, -1
-; RV32-NEXT: vrsub.vx v20, v20, a1
-; RV32-NEXT: vrgather.vv v31, v12, v20
-; RV32-NEXT: vrgather.vv v7, v8, v20
-; RV32-NEXT: vrgather.vv v30, v13, v20
-; RV32-NEXT: vrgather.vv v6, v9, v20
-; RV32-NEXT: vrgather.vv v29, v14, v20
-; RV32-NEXT: vrgather.vv v5, v10, v20
-; RV32-NEXT: vrgather.vv v28, v15, v20
-; RV32-NEXT: vrgather.vv v4, v11, v20
+; RV32-NEXT: vrsub.vx v7, v20, a1
+; RV32-NEXT: vrgather.vv v31, v12, v7
+; RV32-NEXT: vrgather.vv v23, v8, v7
+; RV32-NEXT: vrgather.vv v30, v13, v7
+; RV32-NEXT: vrgather.vv v22, v9, v7
+; RV32-NEXT: vrgather.vv v29, v14, v7
+; RV32-NEXT: vrgather.vv v21, v10, v7
+; RV32-NEXT: vrgather.vv v28, v15, v7
+; RV32-NEXT: vrgather.vv v20, v11, v7
; RV32-NEXT: addi a1, sp, 64
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: add a0, a1, a0
-; RV32-NEXT: vrgather.vv v27, v16, v20
-; RV32-NEXT: vs4r.v v4, (a0)
-; RV32-NEXT: vrgather.vv v26, v17, v20
-; RV32-NEXT: vrgather.vv v25, v18, v20
-; RV32-NEXT: vrgather.vv v24, v19, v20
+; RV32-NEXT: vrgather.vv v27, v16, v7
+; RV32-NEXT: vs4r.v v20, (a0)
+; RV32-NEXT: vrgather.vv v26, v17, v7
+; RV32-NEXT: vrgather.vv v25, v18, v7
+; RV32-NEXT: vrgather.vv v24, v19, v7
; RV32-NEXT: vs8r.v v24, (a1)
; RV32-NEXT: vl8re64.v v16, (a0)
; RV32-NEXT: vl8re64.v v8, (a1)
@@ -2056,23 +2056,23 @@ define <vscale x 12 x i64> @reverse_nxv12i64(<vscale x 12 x i64> %a) {
; RV64-NEXT: vid.v v20
; RV64-NEXT: srli a1, a0, 3
; RV64-NEXT: addi a1, a1, -1
-; RV64-NEXT: vrsub.vx v20, v20, a1
-; RV64-NEXT: vrgather.vv v31, v12, v20
-; RV64-NEXT: vrgather.vv v7, v8, v20
-; RV64-NEXT: vrgather.vv v30, v13, v20
-; RV64-NEXT: vrgather.vv v6, v9, v20
-; RV64-NEXT: vrgather.vv v29, v14, v20
-; RV64-NEXT: vrgather.vv v5, v10, v20
-; RV64-NEXT: vrgather.vv v28, v15, v20
-; RV64-NEXT: vrgather.vv v4, v11, v20
+; RV64-NEXT: vrsub.vx v7, v20, a1
+; RV64-NEXT: vrgather.vv v31, v12, v7
+; RV64-NEXT: vrgather.vv v23, v8, v7
+; RV64-NEXT: vrgather.vv v30, v13, v7
+; RV64-NEXT: vrgather.vv v22, v9, v7
+; RV64-NEXT: vrgather.vv v29, v14, v7
+; RV64-NEXT: vrgather.vv v21, v10, v7
+; RV64-NEXT: vrgather.vv v28, v15, v7
+; RV64-NEXT: vrgather.vv v20, v11, v7
; RV64-NEXT: addi a1, sp, 64
; RV64-NEXT: slli a0, a0, 3
; RV64-NEXT: add a0, a1, a0
-; RV64-NEXT: vrgather.vv v27, v16, v20
-; RV64-NEXT: vs4r.v v4, (a0)
-; RV64-NEXT: vrgather.vv v26, v17, v20
-; RV64-NEXT: vrgather.vv v25, v18, v20
-; RV64-NEXT: vrgather.vv v24, v19, v20
+; RV64-NEXT: vrgather.vv v27, v16, v7
+; RV64-NEXT: vs4r.v v20, (a0)
+; RV64-NEXT: vrgather.vv v26, v17, v7
+; RV64-NEXT: vrgather.vv v25, v18, v7
+; RV64-NEXT: vrgather.vv v24, v19, v7
; RV64-NEXT: vs8r.v v24, (a1)
; RV64-NEXT: vl8re64.v v16, (a0)
; RV64-NEXT: vl8re64.v v8, (a1)
diff --git a/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll b/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll
index 53f8a2503354c..557e809f4f9a8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll
@@ -121,10 +121,10 @@ define <vscale x 4 x i32> @test_vloxei6(ptr %ptr, <vscale x 4 x i7> %offset, i64
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: li a2, 127
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vand.vx v8, v8, a2
+; CHECK-NEXT: vand.vx v12, v8, a2
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT: vzext.vf8 v12, v8
-; CHECK-NEXT: vsll.vi v12, v12, 4
+; CHECK-NEXT: vzext.vf8 v8, v12
+; CHECK-NEXT: vsll.vi v12, v8, 4
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vloxei64.v v8, (a0), v12
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr61561.ll b/llvm/test/CodeGen/RISCV/rvv/pr61561.ll
index 6b08c4409fb63..948df1948ccef 100644
--- a/llvm/test/CodeGen/RISCV/rvv/pr61561.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/pr61561.ll
@@ -4,23 +4,23 @@
define <vscale x 4 x i8> @foo(ptr %p) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
-; CHECK-NEXT: vl1re16.v v8, (a0)
+; CHECK-NEXT: vl1re16.v v10, (a0)
; CHECK-NEXT: lui a0, 4
; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: li a0, 248
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vsll.vi v8, v8, 3
+; CHECK-NEXT: vsll.vi v12, v10, 3
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v12, v8
-; CHECK-NEXT: vand.vx v8, v12, a0
+; CHECK-NEXT: vzext.vf2 v10, v12
+; CHECK-NEXT: vand.vx v10, v10, a0
; CHECK-NEXT: lui a0, 1
; CHECK-NEXT: addi a0, a0, -361
-; CHECK-NEXT: vmacc.vx v10, a0, v8
+; CHECK-NEXT: vmacc.vx v8, a0, v10
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 15
+; CHECK-NEXT: vnsrl.wi v10, v8, 15
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnsrl.wi v8, v10, 0
; CHECK-NEXT: ret
%i13 = load <vscale x 4 x i16>, ptr %p, align 2
%i14 = zext <vscale x 4 x i16> %i13 to <vscale x 4 x i32>
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr95865.ll b/llvm/test/CodeGen/RISCV/rvv/pr95865.ll
index 06a357eeaeb61..ab9849631663c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/pr95865.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/pr95865.ll
@@ -143,13 +143,13 @@ define i32 @main(i1 %arg.1, i64 %arg.2, i1 %arg.3, i64 %arg.4, i1 %arg.5, <vscal
; CHECK-NEXT: .LBB0_12: # %for.body7.us.19
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
; CHECK-NEXT: ld a0, 0(sp) # 8-byte Folded Reload
-; CHECK-NEXT: vmv.s.x v8, a0
-; CHECK-NEXT: vmv.v.i v16, 0
+; CHECK-NEXT: vmv.s.x v16, a0
+; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; CHECK-NEXT: vslideup.vi v16, v8, 1
+; CHECK-NEXT: vslideup.vi v8, v16, 1
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT: vmsne.vi v8, v16, 0
-; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: vmsne.vi v16, v8, 0
+; CHECK-NEXT: vmv.x.s a0, v16
; CHECK-NEXT: snez a0, a0
; CHECK-NEXT: sb a0, 0(zero)
; CHECK-NEXT: li a0, 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
index 64d4c88cbb365..722108f5282fc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
@@ -1370,7 +1370,7 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: mv a3, a1
-; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: slli a1, a1, 2
; CHECK-NEXT: add a1, a1, a3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
@@ -1378,9 +1378,10 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vmv8r.v v0, v16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: mv a3, a1
-; CHECK-NEXT: slli a1, a1, 2
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: add a3, a3, a1
+; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, a1, a3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
@@ -1404,7 +1405,7 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
; CHECK-NEXT: csrr t0, vlenb
; CHECK-NEXT: slli t0, t0, 1
; CHECK-NEXT: mv t1, t0
-; CHECK-NEXT: slli t0, t0, 3
+; CHECK-NEXT: slli t0, t0, 2
; CHECK-NEXT: add t0, t0, t1
; CHECK-NEXT: add t0, sp, t0
; CHECK-NEXT: addi t0, t0, 16
@@ -1415,7 +1416,7 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
; CHECK-NEXT: mv t0, a0
; CHECK-NEXT: slli a0, a0, 1
; CHECK-NEXT: add t0, t0, a0
-; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: slli a0, a0, 2
; CHECK-NEXT: add a0, a0, t0
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
@@ -1449,15 +1450,15 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a6, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16
+; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vmfeq.vv v6, v24, v8, v0.t
+; CHECK-NEXT: vmfeq.vv v6, v24, v16, v0.t
; CHECK-NEXT: add a0, a3, a3
; CHECK-NEXT: bltu a2, a5, .LBB85_4
; CHECK-NEXT: # %bb.3:
@@ -1467,7 +1468,7 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
; CHECK-NEXT: csrr a6, vlenb
; CHECK-NEXT: slli a6, a6, 1
; CHECK-NEXT: mv a7, a6
-; CHECK-NEXT: slli a6, a6, 3
+; CHECK-NEXT: slli a6, a6, 2
; CHECK-NEXT: add a6, a6, a7
; CHECK-NEXT: add a6, sp, a6
; CHECK-NEXT: addi a6, a6, 16
@@ -1478,37 +1479,28 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
; CHECK-NEXT: addi a6, a6, -1
; CHECK-NEXT: and a5, a6, a5
; CHECK-NEXT: csrr a6, vlenb
-; CHECK-NEXT: slli a6, a6, 1
; CHECK-NEXT: mv a7, a6
-; CHECK-NEXT: slli a6, a6, 2
+; CHECK-NEXT: slli a6, a6, 1
+; CHECK-NEXT: add a7, a7, a6
+; CHECK-NEXT: slli a6, a6, 3
; CHECK-NEXT: add a6, a6, a7
; CHECK-NEXT: add a6, sp, a6
; CHECK-NEXT: addi a6, a6, 16
-; CHECK-NEXT: vl8r.v v16, (a6) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v24, (a6) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a5, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: slli a5, a5, 1
-; CHECK-NEXT: add a5, sp, a5
-; CHECK-NEXT: addi a5, a5, 16
-; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28
; CHECK-NEXT: csrr a5, vlenb
; CHECK-NEXT: mv a6, a5
; CHECK-NEXT: slli a5, a5, 1
; CHECK-NEXT: add a6, a6, a5
-; CHECK-NEXT: slli a5, a5, 3
+; CHECK-NEXT: slli a5, a5, 2
; CHECK-NEXT: add a5, a5, a6
; CHECK-NEXT: add a5, sp, a5
; CHECK-NEXT: addi a5, a5, 16
-; CHECK-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
-; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: slli a5, a5, 1
-; CHECK-NEXT: add a5, sp, a5
-; CHECK-NEXT: addi a5, a5, 16
-; CHECK-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vmfeq.vv v4, v24, v8, v0.t
+; CHECK-NEXT: vmfeq.vv v4, v16, v24, v0.t
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v6, v5, a3
; CHECK-NEXT: bltu a2, a4, .LBB85_6
@@ -1516,20 +1508,29 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
; CHECK-NEXT: mv a2, a4
; CHECK-NEXT: .LBB85_6:
; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: mv a5, a4
+; CHECK-NEXT: slli a4, a4, 1
+; CHECK-NEXT: add a5, a5, a4
+; CHECK-NEXT: slli a4, a4, 3
+; CHECK-NEXT: add a4, a4, a5
+; CHECK-NEXT: add a4, sp, a4
+; CHECK-NEXT: addi a4, a4, 16
+; CHECK-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: mv a4, a2
; CHECK-NEXT: slli a2, a2, 1
; CHECK-NEXT: add a4, a4, a2
-; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: slli a2, a2, 2
; CHECK-NEXT: add a2, a2, a4
; CHECK-NEXT: add a2, sp, a2
; CHECK-NEXT: addi a2, a2, 16
; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vmfeq.vv v8, v24, v16, v0.t
+; CHECK-NEXT: vmfeq.vv v8, v16, v24, v0.t
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v8, v4, a3
; CHECK-NEXT: add a0, a1, a1
@@ -3574,7 +3575,7 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: csrr a1, vlenb
; ZVFHMIN-NEXT: slli a1, a1, 1
; ZVFHMIN-NEXT: mv a3, a1
-; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: slli a1, a1, 2
; ZVFHMIN-NEXT: add a1, a1, a3
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
@@ -3582,9 +3583,10 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; ZVFHMIN-NEXT: vmv8r.v v0, v16
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: slli a1, a1, 1
; ZVFHMIN-NEXT: mv a3, a1
-; ZVFHMIN-NEXT: slli a1, a1, 2
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a3, a3, a1
+; ZVFHMIN-NEXT: slli a1, a1, 3
; ZVFHMIN-NEXT: add a1, a1, a3
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
@@ -3608,7 +3610,7 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: csrr t0, vlenb
; ZVFHMIN-NEXT: slli t0, t0, 1
; ZVFHMIN-NEXT: mv t1, t0
-; ZVFHMIN-NEXT: slli t0, t0, 3
+; ZVFHMIN-NEXT: slli t0, t0, 2
; ZVFHMIN-NEXT: add t0, t0, t1
; ZVFHMIN-NEXT: add t0, sp, t0
; ZVFHMIN-NEXT: addi t0, t0, 16
@@ -3619,7 +3621,7 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: mv t0, a0
; ZVFHMIN-NEXT: slli a0, a0, 1
; ZVFHMIN-NEXT: add t0, t0, a0
-; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: slli a0, a0, 2
; ZVFHMIN-NEXT: add a0, a0, t0
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
@@ -3653,15 +3655,15 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a6, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v6, v24, v8, v0.t
+; ZVFHMIN-NEXT: vmfeq.vv v6, v24, v16, v0.t
; ZVFHMIN-NEXT: add a0, a3, a3
; ZVFHMIN-NEXT: bltu a2, a5, .LBB171_4
; ZVFHMIN-NEXT: # %bb.3:
@@ -3671,7 +3673,7 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: csrr a6, vlenb
; ZVFHMIN-NEXT: slli a6, a6, 1
; ZVFHMIN-NEXT: mv a7, a6
-; ZVFHMIN-NEXT: slli a6, a6, 3
+; ZVFHMIN-NEXT: slli a6, a6, 2
; ZVFHMIN-NEXT: add a6, a6, a7
; ZVFHMIN-NEXT: add a6, sp, a6
; ZVFHMIN-NEXT: addi a6, a6, 16
@@ -3682,37 +3684,28 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: addi a6, a6, -1
; ZVFHMIN-NEXT: and a5, a6, a5
; ZVFHMIN-NEXT: csrr a6, vlenb
-; ZVFHMIN-NEXT: slli a6, a6, 1
; ZVFHMIN-NEXT: mv a7, a6
-; ZVFHMIN-NEXT: slli a6, a6, 2
+; ZVFHMIN-NEXT: slli a6, a6, 1
+; ZVFHMIN-NEXT: add a7, a7, a6
+; ZVFHMIN-NEXT: slli a6, a6, 3
; ZVFHMIN-NEXT: add a6, a6, a7
; ZVFHMIN-NEXT: add a6, sp, a6
; ZVFHMIN-NEXT: addi a6, a6, 16
-; ZVFHMIN-NEXT: vl8r.v v16, (a6) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v24, (a6) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a5, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
-; ZVFHMIN-NEXT: csrr a5, vlenb
-; ZVFHMIN-NEXT: slli a5, a5, 1
-; ZVFHMIN-NEXT: add a5, sp, a5
-; ZVFHMIN-NEXT: addi a5, a5, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28
; ZVFHMIN-NEXT: csrr a5, vlenb
; ZVFHMIN-NEXT: mv a6, a5
; ZVFHMIN-NEXT: slli a5, a5, 1
; ZVFHMIN-NEXT: add a6, a6, a5
-; ZVFHMIN-NEXT: slli a5, a5, 3
+; ZVFHMIN-NEXT: slli a5, a5, 2
; ZVFHMIN-NEXT: add a5, a5, a6
; ZVFHMIN-NEXT: add a5, sp, a5
; ZVFHMIN-NEXT: addi a5, a5, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28
-; ZVFHMIN-NEXT: csrr a5, vlenb
-; ZVFHMIN-NEXT: slli a5, a5, 1
-; ZVFHMIN-NEXT: add a5, sp, a5
-; ZVFHMIN-NEXT: addi a5, a5, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v4, v24, v8, v0.t
+; ZVFHMIN-NEXT: vmfeq.vv v4, v16, v24, v0.t
; ZVFHMIN-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslideup.vx v6, v5, a3
; ZVFHMIN-NEXT: bltu a2, a4, .LBB171_6
@@ -3720,20 +3713,29 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: mv a2, a4
; ZVFHMIN-NEXT: .LBB171_6:
; ZVFHMIN-NEXT: vmv1r.v v0, v7
+; ZVFHMIN-NEXT: csrr a4, vlenb
+; ZVFHMIN-NEXT: mv a5, a4
+; ZVFHMIN-NEXT: slli a4, a4, 1
+; ZVFHMIN-NEXT: add a5, a5, a4
+; ZVFHMIN-NEXT: slli a4, a4, 3
+; ZVFHMIN-NEXT: add a4, a4, a5
+; ZVFHMIN-NEXT: add a4, sp, a4
+; ZVFHMIN-NEXT: addi a4, a4, 16
+; ZVFHMIN-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: mv a4, a2
; ZVFHMIN-NEXT: slli a2, a2, 1
; ZVFHMIN-NEXT: add a4, a4, a2
-; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: slli a2, a2, 2
; ZVFHMIN-NEXT: add a2, a2, a4
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v16, v0.t
+; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v24, v0.t
; ZVFHMIN-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslideup.vx v8, v4, a3
; ZVFHMIN-NEXT: add a0, a1, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
index c46334fe556eb..9b794538c404e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
@@ -5750,11 +5750,11 @@ define void @sink_splat_vfwadd_vf(ptr nocapture %a, ptr nocapture %b, float %f)
; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma
; CHECK-NEXT: .LBB125_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vl1re32.v v8, (a0)
+; CHECK-NEXT: vl1re32.v v10, (a0)
; CHECK-NEXT: addi a1, a1, 4
; CHECK-NEXT: addi a2, a2, -4
-; CHECK-NEXT: vfwadd.vf v10, v8, fa0
-; CHECK-NEXT: vs2r.v v10, (a0)
+; CHECK-NEXT: vfwadd.vf v8, v10, fa0
+; CHECK-NEXT: vs2r.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: j .LBB125_1
entry:
@@ -5823,11 +5823,11 @@ define void @sink_splat_vfwmul_vf(ptr nocapture %a, ptr nocapture %b, float %f)
; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma
; CHECK-NEXT: .LBB127_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vl1re32.v v8, (a0)
+; CHECK-NEXT: vl1re32.v v10, (a0)
; CHECK-NEXT: addi a1, a1, 4
; CHECK-NEXT: addi a2, a2, -4
-; CHECK-NEXT: vfwmul.vf v10, v8, fa0
-; CHECK-NEXT: vs2r.v v10, (a0)
+; CHECK-NEXT: vfwmul.vf v8, v10, fa0
+; CHECK-NEXT: vs2r.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: j .LBB127_1
entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vcpop-shl-zext-opt.ll b/llvm/test/CodeGen/RISCV/rvv/vcpop-shl-zext-opt.ll
index 16c4ade7fa9cb..ed6b7f1e6efb8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vcpop-shl-zext-opt.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vcpop-shl-zext-opt.ll
@@ -33,11 +33,11 @@ define dso_local void @test_store1(ptr nocapture noundef writeonly %dst, ptr noc
; RV32-NEXT: sltu a7, t1, a7
; RV32-NEXT: xor t0, t1, a5
; RV32-NEXT: add a6, a6, a7
-; RV32-NEXT: vmslt.vx v10, v8, a2
-; RV32-NEXT: vcompress.vm v12, v8, v10
-; RV32-NEXT: vcpop.m a7, v10
+; RV32-NEXT: vmslt.vx v12, v8, a2
+; RV32-NEXT: vcompress.vm v10, v8, v12
+; RV32-NEXT: vcpop.m a7, v12
; RV32-NEXT: vsetvli zero, a7, e32, m2, ta, ma
-; RV32-NEXT: vse32.v v12, (a0)
+; RV32-NEXT: vse32.v v10, (a0)
; RV32-NEXT: slli a7, a7, 2
; RV32-NEXT: or t0, t0, a6
; RV32-NEXT: add a0, a0, a7
@@ -95,11 +95,11 @@ define dso_local void @test_store1(ptr nocapture noundef writeonly %dst, ptr noc
; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64-NEXT: vle32.v v8, (a6)
; RV64-NEXT: addi a6, a6, 32
-; RV64-NEXT: vmslt.vx v10, v8, a2
-; RV64-NEXT: vcompress.vm v12, v8, v10
-; RV64-NEXT: vcpop.m a7, v10
+; RV64-NEXT: vmslt.vx v12, v8, a2
+; RV64-NEXT: vcompress.vm v10, v8, v12
+; RV64-NEXT: vcpop.m a7, v12
; RV64-NEXT: vsetvli zero, a7, e32, m2, ta, ma
-; RV64-NEXT: vse32.v v12, (a0)
+; RV64-NEXT: vse32.v v10, (a0)
; RV64-NEXT: slli a7, a7, 2
; RV64-NEXT: add a0, a0, a7
; RV64-NEXT: bne a6, a5, .LBB0_4
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
index d2e743b17f38a..20e68b13bae10 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
@@ -161,15 +161,15 @@ define {<2 x i32>, <2 x i32>, <2 x i32>} @vector_deinterleave3_v2i32_v6i32(<6 x
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v8, 2
+; CHECK-NEXT: vslidedown.vi v12, v8, 2
; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v12, v8, 4
+; CHECK-NEXT: vslidedown.vi v10, v8, 4
; CHECK-NEXT: srli a0, a0, 3
; CHECK-NEXT: add a1, a0, a0
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v10, a0
+; CHECK-NEXT: vslideup.vx v8, v12, a0
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vmv1r.v v9, v12
+; CHECK-NEXT: vmv1r.v v9, v10
; CHECK-NEXT: vs2r.v v8, (a0)
; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
; CHECK-NEXT: vlseg3e32.v v8, (a0)
@@ -196,22 +196,22 @@ define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @vector_deinterle
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v8, 6
-; CHECK-NEXT: vslidedown.vi v11, v8, 4
-; CHECK-NEXT: vslidedown.vi v12, v8, 2
+; CHECK-NEXT: vslidedown.vi v12, v8, 6
+; CHECK-NEXT: vslidedown.vi v13, v8, 4
+; CHECK-NEXT: vslidedown.vi v14, v8, 2
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v14, v8, 8
+; CHECK-NEXT: vslidedown.vi v10, v8, 8
; CHECK-NEXT: srli a1, a0, 3
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: add a2, a1, a1
; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v11, v10, a1
-; CHECK-NEXT: vslideup.vx v8, v12, a1
+; CHECK-NEXT: vslideup.vx v13, v12, a1
+; CHECK-NEXT: vslideup.vx v8, v14, a1
; CHECK-NEXT: add a1, a0, a0
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v11, a0
+; CHECK-NEXT: vslideup.vx v8, v13, a0
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vmv1r.v v9, v14
+; CHECK-NEXT: vmv1r.v v9, v10
; CHECK-NEXT: vs2r.v v8, (a0)
; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
; CHECK-NEXT: vlseg5e16.v v8, (a0)
@@ -538,15 +538,15 @@ define {<2 x float>, <2 x float>, <2 x float>} @vector_deinterleave3_v632_v2f32(
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v8, 2
+; CHECK-NEXT: vslidedown.vi v12, v8, 2
; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v12, v8, 4
+; CHECK-NEXT: vslidedown.vi v10, v8, 4
; CHECK-NEXT: srli a0, a0, 3
; CHECK-NEXT: add a1, a0, a0
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v10, a0
+; CHECK-NEXT: vslideup.vx v8, v12, a0
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vmv1r.v v9, v12
+; CHECK-NEXT: vmv1r.v v9, v10
; CHECK-NEXT: vs2r.v v8, (a0)
; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
; CHECK-NEXT: vlseg3e32.v v8, (a0)
@@ -573,22 +573,22 @@ define {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @vector_dein
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v8, 6
-; CHECK-NEXT: vslidedown.vi v11, v8, 4
-; CHECK-NEXT: vslidedown.vi v12, v8, 2
+; CHECK-NEXT: vslidedown.vi v12, v8, 6
+; CHECK-NEXT: vslidedown.vi v13, v8, 4
+; CHECK-NEXT: vslidedown.vi v14, v8, 2
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v14, v8, 8
+; CHECK-NEXT: vslidedown.vi v10, v8, 8
; CHECK-NEXT: srli a1, a0, 3
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: add a2, a1, a1
; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v11, v10, a1
-; CHECK-NEXT: vslideup.vx v8, v12, a1
+; CHECK-NEXT: vslideup.vx v13, v12, a1
+; CHECK-NEXT: vslideup.vx v8, v14, a1
; CHECK-NEXT: add a1, a0, a0
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v11, a0
+; CHECK-NEXT: vslideup.vx v8, v13, a0
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vmv1r.v v9, v14
+; CHECK-NEXT: vmv1r.v v9, v10
; CHECK-NEXT: vs2r.v v8, (a0)
; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
; CHECK-NEXT: vlseg5e16.v v8, (a0)
@@ -615,28 +615,28 @@ define {<1 x half>, <1 x half>, <1 x half>, <1 x half>, <1 x half>, <1 x half>,
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v9, v8, 3
-; CHECK-NEXT: vslidedown.vi v10, v8, 2
-; CHECK-NEXT: vslidedown.vi v11, v8, 1
-; CHECK-NEXT: vmv1r.v v12, v8
-; CHECK-NEXT: vslidedown.vi v14, v8, 5
-; CHECK-NEXT: vslidedown.vi v15, v8, 6
+; CHECK-NEXT: vslidedown.vi v11, v8, 2
+; CHECK-NEXT: vslidedown.vi v12, v8, 1
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vslidedown.vi v13, v8, 5
+; CHECK-NEXT: vslidedown.vi v14, v8, 6
; CHECK-NEXT: srli a1, a0, 3
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: add a2, a1, a1
; CHECK-NEXT: add a3, a0, a0
; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v10, v9, a1
-; CHECK-NEXT: vslideup.vx v12, v11, a1
+; CHECK-NEXT: vslideup.vx v11, v9, a1
+; CHECK-NEXT: vslideup.vx v10, v12, a1
; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v12, v10, a0
+; CHECK-NEXT: vslideup.vx v10, v11, a0
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v13, v8, 4
+; CHECK-NEXT: vslidedown.vi v11, v8, 4
; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v13, v14, a1
+; CHECK-NEXT: vslideup.vx v11, v13, a1
; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v13, v15, a0
+; CHECK-NEXT: vslideup.vx v11, v14, a0
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs2r.v v12, (a0)
+; CHECK-NEXT: vs2r.v v10, (a0)
; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
; CHECK-NEXT: vlseg7e16.v v8, (a0)
; CHECK-NEXT: csrr a0, vlenb
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
index 14f306da21dba..745c2cd72a6f8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
@@ -16,14 +16,14 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>} @vector_deinterleave_load_nxv16i
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v8, a0
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmv.v.i v10, 0
-; CHECK-NEXT: vmerge.vim v14, v10, 1, v0
+; CHECK-NEXT: vmv.v.i v12, 0
+; CHECK-NEXT: vmerge.vim v10, v12, 1, v0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v12, v10, 1, v0
-; CHECK-NEXT: vnsrl.wi v8, v12, 0
-; CHECK-NEXT: vnsrl.wi v10, v12, 8
-; CHECK-NEXT: vmsne.vi v0, v8, 0
-; CHECK-NEXT: vmsne.vi v8, v10, 0
+; CHECK-NEXT: vmerge.vim v8, v12, 1, v0
+; CHECK-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-NEXT: vnsrl.wi v14, v8, 8
+; CHECK-NEXT: vmsne.vi v0, v12, 0
+; CHECK-NEXT: vmsne.vi v8, v14, 0
; CHECK-NEXT: ret
%vec = load <vscale x 32 x i1>, ptr %p
%deinterleaved.results = call {<vscale x 16 x i1>, <vscale x 16 x i1>} @llvm.vector.deinterleave2.nxv32i1(<vscale x 32 x i1> %vec)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
index ff55c09a0707b..319dfb037c15a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
@@ -10,18 +10,18 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>} @vector_deinterleave_nxv16i1_nxv
; CHECK-LABEL: vector_deinterleave_nxv16i1_nxv32i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: vmerge.vim v12, v8, 1, v0
+; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v0, a0
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v14, v8, 1, v0
-; CHECK-NEXT: vnsrl.wi v8, v12, 0
-; CHECK-NEXT: vnsrl.wi v10, v12, 8
-; CHECK-NEXT: vmsne.vi v0, v8, 0
-; CHECK-NEXT: vmsne.vi v8, v10, 0
+; CHECK-NEXT: vmerge.vim v10, v10, 1, v0
+; CHECK-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-NEXT: vnsrl.wi v14, v8, 8
+; CHECK-NEXT: vmsne.vi v0, v12, 0
+; CHECK-NEXT: vmsne.vi v8, v14, 0
; CHECK-NEXT: ret
%retval = call {<vscale x 16 x i1>, <vscale x 16 x i1>} @llvm.vector.deinterleave2.nxv32i1(<vscale x 32 x i1> %vec)
ret {<vscale x 16 x i1>, <vscale x 16 x i1>} %retval
@@ -74,12 +74,12 @@ define {<vscale x 2 x i64>, <vscale x 2 x i64>} @vector_deinterleave_nxv2i64_nxv
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.x v16, a0
; CHECK-NEXT: li a0, 170
-; CHECK-NEXT: vmv.v.x v17, a0
+; CHECK-NEXT: vmv.v.x v20, a0
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; CHECK-NEXT: vcompress.vm v12, v8, v16
-; CHECK-NEXT: vcompress.vm v20, v8, v17
+; CHECK-NEXT: vcompress.vm v16, v8, v20
; CHECK-NEXT: vmv2r.v v8, v12
-; CHECK-NEXT: vmv2r.v v10, v20
+; CHECK-NEXT: vmv2r.v v10, v16
; CHECK-NEXT: ret
%retval = call {<vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> %vec)
ret {<vscale x 2 x i64>, <vscale x 2 x i64>} %retval
@@ -92,12 +92,12 @@ define {<vscale x 4 x i64>, <vscale x 4 x i64>} @vector_deinterleave_nxv4i64_nxv
; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.x v24, a0
; CHECK-NEXT: li a0, 170
-; CHECK-NEXT: vmv.v.x v25, a0
+; CHECK-NEXT: vmv.v.x v7, a0
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
; CHECK-NEXT: vcompress.vm v16, v8, v24
-; CHECK-NEXT: vcompress.vm v0, v8, v25
+; CHECK-NEXT: vcompress.vm v24, v8, v7
; CHECK-NEXT: vmv4r.v v8, v16
-; CHECK-NEXT: vmv4r.v v12, v0
+; CHECK-NEXT: vmv4r.v v12, v24
; CHECK-NEXT: ret
%retval = call {<vscale x 4 x i64>, <vscale x 4 x i64>} @llvm.vector.deinterleave2.nxv8i64(<vscale x 8 x i64> %vec)
ret {<vscale x 4 x i64>, <vscale x 4 x i64>} %retval
@@ -336,12 +336,12 @@ define {<vscale x 2 x double>, <vscale x 2 x double>} @vector_deinterleave_nxv2f
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.x v16, a0
; CHECK-NEXT: li a0, 170
-; CHECK-NEXT: vmv.v.x v17, a0
+; CHECK-NEXT: vmv.v.x v20, a0
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; CHECK-NEXT: vcompress.vm v12, v8, v16
-; CHECK-NEXT: vcompress.vm v20, v8, v17
+; CHECK-NEXT: vcompress.vm v16, v8, v20
; CHECK-NEXT: vmv2r.v v8, v12
-; CHECK-NEXT: vmv2r.v v10, v20
+; CHECK-NEXT: vmv2r.v v10, v16
; CHECK-NEXT: ret
%retval = call {<vscale x 2 x double>, <vscale x 2 x double>} @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %vec)
ret {<vscale x 2 x double>, <vscale x 2 x double>} %retval
@@ -574,9 +574,9 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16
; CHECK-NEXT: sub sp, sp, a0
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vmv.v.i v10, 0
+; CHECK-NEXT: vmv.v.i v12, 0
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: vmerge.vim v16, v10, 1, v0
+; CHECK-NEXT: vmerge.vim v16, v12, 1, v0
; CHECK-NEXT: srli a1, a0, 2
; CHECK-NEXT: srli a2, a0, 1
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
@@ -584,46 +584,46 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16
; CHECK-NEXT: srli a1, a0, 3
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: vsetvli a3, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v18, v10, 1, v0
+; CHECK-NEXT: vmerge.vim v18, v12, 1, v0
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v9, a2
; CHECK-NEXT: sub a0, a0, a1
; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v20, v10, 1, v0
+; CHECK-NEXT: vmerge.vim v20, v12, 1, v0
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v9, a0
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v12, v10, 1, v0
-; CHECK-NEXT: vmv1r.v v26, v13
+; CHECK-NEXT: vmerge.vim v14, v12, 1, v0
+; CHECK-NEXT: vmv1r.v v10, v15
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
-; CHECK-NEXT: vmv1r.v v27, v8
+; CHECK-NEXT: vmerge.vim v12, v12, 1, v0
+; CHECK-NEXT: vmv1r.v v11, v12
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vmv1r.v v24, v21
+; CHECK-NEXT: vmv1r.v v8, v21
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vmv1r.v v25, v12
+; CHECK-NEXT: vmv1r.v v9, v14
; CHECK-NEXT: vs8r.v v16, (a0)
-; CHECK-NEXT: vmv1r.v v28, v9
-; CHECK-NEXT: vs8r.v v24, (a1)
+; CHECK-NEXT: vmv1r.v v12, v13
+; CHECK-NEXT: vs8r.v v8, (a1)
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
-; CHECK-NEXT: vlseg5e8.v v8, (a0)
-; CHECK-NEXT: vlseg5e8.v v14, (a1)
-; CHECK-NEXT: vmv2r.v v20, v8
-; CHECK-NEXT: vmv2r.v v22, v10
-; CHECK-NEXT: vmv1r.v v21, v14
+; CHECK-NEXT: vlseg5e8.v v12, (a0)
+; CHECK-NEXT: vlseg5e8.v v18, (a1)
+; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv2r.v v10, v14
+; CHECK-NEXT: vmv1r.v v9, v18
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmsne.vi v0, v20, 0
-; CHECK-NEXT: vmv1r.v v14, v9
-; CHECK-NEXT: vmsne.vi v8, v14, 0
-; CHECK-NEXT: vmv1r.v v23, v16
-; CHECK-NEXT: vmsne.vi v9, v22, 0
-; CHECK-NEXT: vmv1r.v v16, v11
-; CHECK-NEXT: vmsne.vi v10, v16, 0
-; CHECK-NEXT: vmv1r.v v13, v18
-; CHECK-NEXT: vmsne.vi v11, v12, 0
+; CHECK-NEXT: vmsne.vi v0, v8, 0
+; CHECK-NEXT: vmv1r.v v18, v13
+; CHECK-NEXT: vmsne.vi v8, v18, 0
+; CHECK-NEXT: vmv1r.v v11, v20
+; CHECK-NEXT: vmsne.vi v9, v10, 0
+; CHECK-NEXT: vmv1r.v v20, v15
+; CHECK-NEXT: vmsne.vi v10, v20, 0
+; CHECK-NEXT: vmv1r.v v17, v22
+; CHECK-NEXT: vmsne.vi v11, v16, 0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
@@ -806,9 +806,9 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16
; CHECK-NEXT: sub sp, sp, a0
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vmv.v.i v10, 0
+; CHECK-NEXT: vmv.v.i v12, 0
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: vmerge.vim v16, v10, 1, v0
+; CHECK-NEXT: vmerge.vim v16, v12, 1, v0
; CHECK-NEXT: srli a1, a0, 2
; CHECK-NEXT: srli a2, a0, 1
; CHECK-NEXT: srli a3, a0, 3
@@ -816,40 +816,40 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16
; CHECK-NEXT: vslidedown.vx v0, v0, a1
; CHECK-NEXT: slli a3, a3, 1
; CHECK-NEXT: vsetvli a4, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v18, v10, 1, v0
+; CHECK-NEXT: vmerge.vim v18, v12, 1, v0
; CHECK-NEXT: vsetvli a4, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v9, a2
; CHECK-NEXT: sub a0, a0, a3
; CHECK-NEXT: vsetvli a3, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v20, v10, 1, v0
+; CHECK-NEXT: vmerge.vim v20, v12, 1, v0
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v9, a0
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v22, v10, 1, v0
+; CHECK-NEXT: vmerge.vim v22, v12, 1, v0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v12, v10, 1, v0
+; CHECK-NEXT: vmerge.vim v14, v12, 1, v0
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v8, a1
-; CHECK-NEXT: vmv1r.v v26, v13
+; CHECK-NEXT: vmv1r.v v10, v15
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v14, v10, 1, v0
-; CHECK-NEXT: vmv1r.v v27, v14
+; CHECK-NEXT: vmerge.vim v24, v12, 1, v0
+; CHECK-NEXT: vmv1r.v v11, v24
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v8, a2
-; CHECK-NEXT: vmv1r.v v24, v23
+; CHECK-NEXT: vmv1r.v v8, v23
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vmv1r.v v25, v12
+; CHECK-NEXT: vmv1r.v v9, v14
; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
-; CHECK-NEXT: vmv1r.v v28, v15
+; CHECK-NEXT: vmerge.vim v14, v12, 1, v0
+; CHECK-NEXT: vmv1r.v v12, v25
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vmv1r.v v29, v8
+; CHECK-NEXT: vmv1r.v v13, v14
; CHECK-NEXT: vs8r.v v16, (a0)
-; CHECK-NEXT: vmv1r.v v30, v9
-; CHECK-NEXT: vs8r.v v24, (a1)
+; CHECK-NEXT: vmv1r.v v14, v15
+; CHECK-NEXT: vs8r.v v8, (a1)
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
; CHECK-NEXT: vlseg7e8.v v8, (a0)
; CHECK-NEXT: vlseg7e8.v v16, (a1)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
index 1e4cb06480163..e2f81acc9cf99 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
@@ -49,18 +49,20 @@ define <16 x i16> @vector_interleave_v16i16_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: vector_interleave_v16i16_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vwaddu.vv v10, v8, v9
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwaddu.vv v8, v11, v10
; CHECK-NEXT: li a0, -1
-; CHECK-NEXT: vwmaccu.vx v10, a0, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vwmaccu.vx v8, a0, v10
; CHECK-NEXT: ret
;
; ZVBB-LABEL: vector_interleave_v16i16_v8i16:
; ZVBB: # %bb.0:
; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVBB-NEXT: vwsll.vi v10, v9, 16
-; ZVBB-NEXT: vwaddu.wv v10, v10, v8
-; ZVBB-NEXT: vmv2r.v v8, v10
+; ZVBB-NEXT: vmv1r.v v10, v9
+; ZVBB-NEXT: vmv1r.v v11, v8
+; ZVBB-NEXT: vwsll.vi v8, v10, 16
+; ZVBB-NEXT: vwaddu.wv v8, v8, v11
; ZVBB-NEXT: ret
%res = call <16 x i16> @llvm.vector.interleave2.v16i16(<8 x i16> %a, <8 x i16> %b)
ret <16 x i16> %res
@@ -70,19 +72,21 @@ define <8 x i32> @vector_interleave_v8i32_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: vector_interleave_v8i32_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vwaddu.vv v10, v8, v9
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwaddu.vv v8, v11, v10
; CHECK-NEXT: li a0, -1
-; CHECK-NEXT: vwmaccu.vx v10, a0, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vwmaccu.vx v8, a0, v10
; CHECK-NEXT: ret
;
; ZVBB-LABEL: vector_interleave_v8i32_v4i32:
; ZVBB: # %bb.0:
-; ZVBB-NEXT: li a0, 32
; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; ZVBB-NEXT: vwsll.vx v10, v9, a0
-; ZVBB-NEXT: vwaddu.wv v10, v10, v8
-; ZVBB-NEXT: vmv2r.v v8, v10
+; ZVBB-NEXT: vmv1r.v v10, v9
+; ZVBB-NEXT: vmv1r.v v11, v8
+; ZVBB-NEXT: li a0, 32
+; ZVBB-NEXT: vwsll.vx v8, v10, a0
+; ZVBB-NEXT: vwaddu.wv v8, v8, v11
; ZVBB-NEXT: ret
%res = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %a, <4 x i32> %b)
ret <8 x i32> %res
@@ -205,18 +209,18 @@ define <10 x i16> @vector_interleave5_v10i16_v2i16(<2 x i16> %a, <2 x i16> %b, <
; CHECK-NEXT: vsseg5e16.v v8, (a0)
; CHECK-NEXT: add a4, a3, a1
; CHECK-NEXT: vle16.v v9, (a2)
-; CHECK-NEXT: vle16.v v10, (a4)
-; CHECK-NEXT: vle16.v v11, (a3)
+; CHECK-NEXT: vle16.v v11, (a4)
+; CHECK-NEXT: vle16.v v12, (a3)
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: add a1, a4, a1
-; CHECK-NEXT: vle16.v v12, (a1)
+; CHECK-NEXT: vle16.v v10, (a1)
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vslideup.vi v11, v10, 2
+; CHECK-NEXT: vslideup.vi v12, v11, 2
; CHECK-NEXT: vslideup.vi v8, v9, 2
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v11, 4
+; CHECK-NEXT: vslideup.vi v8, v12, 4
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v12, 8
+; CHECK-NEXT: vslideup.vi v8, v10, 8
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 1
; CHECK-NEXT: add sp, sp, a0
@@ -242,18 +246,18 @@ define <10 x i16> @vector_interleave5_v10i16_v2i16(<2 x i16> %a, <2 x i16> %b, <
; ZVBB-NEXT: vsseg5e16.v v8, (a0)
; ZVBB-NEXT: add a4, a3, a1
; ZVBB-NEXT: vle16.v v9, (a2)
-; ZVBB-NEXT: vle16.v v10, (a4)
-; ZVBB-NEXT: vle16.v v11, (a3)
+; ZVBB-NEXT: vle16.v v11, (a4)
+; ZVBB-NEXT: vle16.v v12, (a3)
; ZVBB-NEXT: vle16.v v8, (a0)
; ZVBB-NEXT: add a1, a4, a1
-; ZVBB-NEXT: vle16.v v12, (a1)
+; ZVBB-NEXT: vle16.v v10, (a1)
; ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVBB-NEXT: vslideup.vi v11, v10, 2
+; ZVBB-NEXT: vslideup.vi v12, v11, 2
; ZVBB-NEXT: vslideup.vi v8, v9, 2
; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVBB-NEXT: vslideup.vi v8, v11, 4
+; ZVBB-NEXT: vslideup.vi v8, v12, 4
; ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVBB-NEXT: vslideup.vi v8, v12, 8
+; ZVBB-NEXT: vslideup.vi v8, v10, 8
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a0, a0, 1
; ZVBB-NEXT: add sp, sp, a0
@@ -424,18 +428,20 @@ define <16 x half> @vector_interleave_v16f16_v8f16(<8 x half> %a, <8 x half> %b)
; CHECK-LABEL: vector_interleave_v16f16_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vwaddu.vv v10, v8, v9
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwaddu.vv v8, v11, v10
; CHECK-NEXT: li a0, -1
-; CHECK-NEXT: vwmaccu.vx v10, a0, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vwmaccu.vx v8, a0, v10
; CHECK-NEXT: ret
;
; ZVBB-LABEL: vector_interleave_v16f16_v8f16:
; ZVBB: # %bb.0:
; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVBB-NEXT: vwsll.vi v10, v9, 16
-; ZVBB-NEXT: vwaddu.wv v10, v10, v8
-; ZVBB-NEXT: vmv2r.v v8, v10
+; ZVBB-NEXT: vmv1r.v v10, v9
+; ZVBB-NEXT: vmv1r.v v11, v8
+; ZVBB-NEXT: vwsll.vi v8, v10, 16
+; ZVBB-NEXT: vwaddu.wv v8, v8, v11
; ZVBB-NEXT: ret
%res = call <16 x half> @llvm.vector.interleave2.v16f16(<8 x half> %a, <8 x half> %b)
ret <16 x half> %res
@@ -445,19 +451,21 @@ define <8 x float> @vector_interleave_v8f32_v4f32(<4 x float> %a, <4 x float> %b
; CHECK-LABEL: vector_interleave_v8f32_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vwaddu.vv v10, v8, v9
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwaddu.vv v8, v11, v10
; CHECK-NEXT: li a0, -1
-; CHECK-NEXT: vwmaccu.vx v10, a0, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vwmaccu.vx v8, a0, v10
; CHECK-NEXT: ret
;
; ZVBB-LABEL: vector_interleave_v8f32_v4f32:
; ZVBB: # %bb.0:
-; ZVBB-NEXT: li a0, 32
; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; ZVBB-NEXT: vwsll.vx v10, v9, a0
-; ZVBB-NEXT: vwaddu.wv v10, v10, v8
-; ZVBB-NEXT: vmv2r.v v8, v10
+; ZVBB-NEXT: vmv1r.v v10, v9
+; ZVBB-NEXT: vmv1r.v v11, v8
+; ZVBB-NEXT: li a0, 32
+; ZVBB-NEXT: vwsll.vx v8, v10, a0
+; ZVBB-NEXT: vwaddu.wv v8, v8, v11
; ZVBB-NEXT: ret
%res = call <8 x float> @llvm.vector.interleave2.v8f32(<4 x float> %a, <4 x float> %b)
ret <8 x float> %res
@@ -580,18 +588,18 @@ define <10 x half> @vector_interleave5_v10f16_v2f16(<2 x half> %a, <2 x half> %b
; CHECK-NEXT: vsseg5e16.v v8, (a0)
; CHECK-NEXT: add a4, a3, a1
; CHECK-NEXT: vle16.v v9, (a2)
-; CHECK-NEXT: vle16.v v10, (a4)
-; CHECK-NEXT: vle16.v v11, (a3)
+; CHECK-NEXT: vle16.v v11, (a4)
+; CHECK-NEXT: vle16.v v12, (a3)
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: add a1, a4, a1
-; CHECK-NEXT: vle16.v v12, (a1)
+; CHECK-NEXT: vle16.v v10, (a1)
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vslideup.vi v11, v10, 2
+; CHECK-NEXT: vslideup.vi v12, v11, 2
; CHECK-NEXT: vslideup.vi v8, v9, 2
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v11, 4
+; CHECK-NEXT: vslideup.vi v8, v12, 4
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v12, 8
+; CHECK-NEXT: vslideup.vi v8, v10, 8
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 1
; CHECK-NEXT: add sp, sp, a0
@@ -617,18 +625,18 @@ define <10 x half> @vector_interleave5_v10f16_v2f16(<2 x half> %a, <2 x half> %b
; ZVBB-NEXT: vsseg5e16.v v8, (a0)
; ZVBB-NEXT: add a4, a3, a1
; ZVBB-NEXT: vle16.v v9, (a2)
-; ZVBB-NEXT: vle16.v v10, (a4)
-; ZVBB-NEXT: vle16.v v11, (a3)
+; ZVBB-NEXT: vle16.v v11, (a4)
+; ZVBB-NEXT: vle16.v v12, (a3)
; ZVBB-NEXT: vle16.v v8, (a0)
; ZVBB-NEXT: add a1, a4, a1
-; ZVBB-NEXT: vle16.v v12, (a1)
+; ZVBB-NEXT: vle16.v v10, (a1)
; ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVBB-NEXT: vslideup.vi v11, v10, 2
+; ZVBB-NEXT: vslideup.vi v12, v11, 2
; ZVBB-NEXT: vslideup.vi v8, v9, 2
; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVBB-NEXT: vslideup.vi v8, v11, 4
+; ZVBB-NEXT: vslideup.vi v8, v12, 4
; ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVBB-NEXT: vslideup.vi v8, v12, 8
+; ZVBB-NEXT: vslideup.vi v8, v10, 8
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a0, a0, 1
; ZVBB-NEXT: add sp, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
index 8f6365d35f885..b5eb312bf5e18 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
@@ -17,17 +17,17 @@ define void @vector_interleave_store_nxv32i1_nxv16i1(<vscale x 16 x i1> %a, <vsc
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: vmerge.vim v12, v10, 1, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
+; CHECK-NEXT: vmerge.vim v14, v10, 1, v0
; CHECK-NEXT: srli a2, a2, 2
-; CHECK-NEXT: vwaddu.vv v16, v8, v12
-; CHECK-NEXT: vwmaccu.vx v16, a1, v12
-; CHECK-NEXT: vmsne.vi v8, v18, 0
-; CHECK-NEXT: vmsne.vi v9, v16, 0
+; CHECK-NEXT: vwaddu.vv v8, v14, v12
+; CHECK-NEXT: vwmaccu.vx v8, a1, v12
+; CHECK-NEXT: vmsne.vi v12, v10, 0
+; CHECK-NEXT: vmsne.vi v10, v8, 0
; CHECK-NEXT: add a1, a2, a2
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v9, v8, a2
+; CHECK-NEXT: vslideup.vx v10, v12, a2
; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma
-; CHECK-NEXT: vsm.v v9, (a0)
+; CHECK-NEXT: vsm.v v10, (a0)
; CHECK-NEXT: ret
%res = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
store <vscale x 32 x i1> %res, ptr %p
@@ -104,17 +104,17 @@ define void @vector_interleave_store_nxv16i64_nxv8i64(<vscale x 8 x i64> %a, <vs
; CHECK-NEXT: srli a2, a1, 1
; CHECK-NEXT: vmv4r.v v28, v16
; CHECK-NEXT: vmv4r.v v16, v12
-; CHECK-NEXT: vsrl.vi v8, v6, 1
-; CHECK-NEXT: vand.vi v10, v6, 1
+; CHECK-NEXT: vsrl.vi v4, v6, 1
+; CHECK-NEXT: vand.vi v8, v6, 1
; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: vmsne.vi v0, v10, 0
+; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: add a1, a0, a1
-; CHECK-NEXT: vadd.vx v8, v8, a2, v0.t
+; CHECK-NEXT: vadd.vx v4, v4, a2, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vrgatherei16.vv v0, v24, v8
-; CHECK-NEXT: vrgatherei16.vv v24, v16, v8
+; CHECK-NEXT: vrgatherei16.vv v8, v24, v4
+; CHECK-NEXT: vrgatherei16.vv v24, v16, v4
; CHECK-NEXT: vs8r.v v24, (a1)
-; CHECK-NEXT: vs8r.v v0, (a0)
+; CHECK-NEXT: vs8r.v v8, (a0)
; CHECK-NEXT: ret
%res = call <vscale x 16 x i64> @llvm.vector.interleave2.nxv16i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b)
store <vscale x 16 x i64> %res, ptr %p
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
index 6aa62c2256925..a41ed52343099 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
@@ -19,15 +19,15 @@ define <vscale x 32 x i1> @vector_interleave_nxv32i1_nxv16i1(<vscale x 16 x i1>
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: vmerge.vim v12, v10, 1, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
+; CHECK-NEXT: vmerge.vim v14, v10, 1, v0
; CHECK-NEXT: srli a1, a1, 2
-; CHECK-NEXT: vwaddu.vv v16, v8, v12
-; CHECK-NEXT: vwmaccu.vx v16, a0, v12
-; CHECK-NEXT: vmsne.vi v8, v18, 0
-; CHECK-NEXT: vmsne.vi v0, v16, 0
+; CHECK-NEXT: vwaddu.vv v8, v14, v12
+; CHECK-NEXT: vwmaccu.vx v8, a0, v12
+; CHECK-NEXT: vmsne.vi v12, v10, 0
+; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: add a0, a1, a1
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v0, v8, a1
+; CHECK-NEXT: vslideup.vx v0, v12, a1
; CHECK-NEXT: ret
;
; ZVBB-LABEL: vector_interleave_nxv32i1_nxv16i1:
@@ -57,18 +57,20 @@ define <vscale x 32 x i8> @vector_interleave_nxv32i8_nxv16i8(<vscale x 16 x i8>
; CHECK-LABEL: vector_interleave_nxv32i8_nxv16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
-; CHECK-NEXT: vwaddu.vv v12, v8, v10
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwaddu.vv v8, v14, v12
; CHECK-NEXT: li a0, -1
-; CHECK-NEXT: vwmaccu.vx v12, a0, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vwmaccu.vx v8, a0, v12
; CHECK-NEXT: ret
;
; ZVBB-LABEL: vector_interleave_nxv32i8_nxv16i8:
; ZVBB: # %bb.0:
; ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, ma
-; ZVBB-NEXT: vwsll.vi v12, v10, 8
-; ZVBB-NEXT: vwaddu.wv v12, v12, v8
-; ZVBB-NEXT: vmv4r.v v8, v12
+; ZVBB-NEXT: vmv2r.v v12, v10
+; ZVBB-NEXT: vmv2r.v v14, v8
+; ZVBB-NEXT: vwsll.vi v8, v12, 8
+; ZVBB-NEXT: vwaddu.wv v8, v8, v14
; ZVBB-NEXT: ret
%res = call <vscale x 32 x i8> @llvm.vector.interleave2.nxv32i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
ret <vscale x 32 x i8> %res
@@ -78,18 +80,20 @@ define <vscale x 16 x i16> @vector_interleave_nxv16i16_nxv8i16(<vscale x 8 x i16
; CHECK-LABEL: vector_interleave_nxv16i16_nxv8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vwaddu.vv v12, v8, v10
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwaddu.vv v8, v14, v12
; CHECK-NEXT: li a0, -1
-; CHECK-NEXT: vwmaccu.vx v12, a0, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vwmaccu.vx v8, a0, v12
; CHECK-NEXT: ret
;
; ZVBB-LABEL: vector_interleave_nxv16i16_nxv8i16:
; ZVBB: # %bb.0:
; ZVBB-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; ZVBB-NEXT: vwsll.vi v12, v10, 16
-; ZVBB-NEXT: vwaddu.wv v12, v12, v8
-; ZVBB-NEXT: vmv4r.v v8, v12
+; ZVBB-NEXT: vmv2r.v v12, v10
+; ZVBB-NEXT: vmv2r.v v14, v8
+; ZVBB-NEXT: vwsll.vi v8, v12, 16
+; ZVBB-NEXT: vwaddu.wv v8, v8, v14
; ZVBB-NEXT: ret
%res = call <vscale x 16 x i16> @llvm.vector.interleave2.nxv16i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
ret <vscale x 16 x i16> %res
@@ -99,19 +103,21 @@ define <vscale x 8 x i32> @vector_interleave_nxv8i32_nxv4i32(<vscale x 4 x i32>
; CHECK-LABEL: vector_interleave_nxv8i32_nxv4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vwaddu.vv v12, v8, v10
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwaddu.vv v8, v14, v12
; CHECK-NEXT: li a0, -1
-; CHECK-NEXT: vwmaccu.vx v12, a0, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vwmaccu.vx v8, a0, v12
; CHECK-NEXT: ret
;
; ZVBB-LABEL: vector_interleave_nxv8i32_nxv4i32:
; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; ZVBB-NEXT: vmv2r.v v12, v10
+; ZVBB-NEXT: vmv2r.v v14, v8
; ZVBB-NEXT: li a0, 32
-; ZVBB-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVBB-NEXT: vwsll.vx v12, v10, a0
-; ZVBB-NEXT: vwaddu.wv v12, v12, v8
-; ZVBB-NEXT: vmv4r.v v8, v12
+; ZVBB-NEXT: vwsll.vx v8, v12, a0
+; ZVBB-NEXT: vwaddu.wv v8, v8, v14
; ZVBB-NEXT: ret
%res = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
ret <vscale x 8 x i32> %res
@@ -361,18 +367,20 @@ define <vscale x 8 x bfloat> @vector_interleave_nxv8bf16_nxv4bf16(<vscale x 4 x
; CHECK-LABEL: vector_interleave_nxv8bf16_nxv4bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vwaddu.vv v10, v8, v9
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwaddu.vv v8, v11, v10
; CHECK-NEXT: li a0, -1
-; CHECK-NEXT: vwmaccu.vx v10, a0, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vwmaccu.vx v8, a0, v10
; CHECK-NEXT: ret
;
; ZVBB-LABEL: vector_interleave_nxv8bf16_nxv4bf16:
; ZVBB: # %bb.0:
; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; ZVBB-NEXT: vwsll.vi v10, v9, 16
-; ZVBB-NEXT: vwaddu.wv v10, v10, v8
-; ZVBB-NEXT: vmv2r.v v8, v10
+; ZVBB-NEXT: vmv1r.v v10, v9
+; ZVBB-NEXT: vmv1r.v v11, v8
+; ZVBB-NEXT: vwsll.vi v8, v10, 16
+; ZVBB-NEXT: vwaddu.wv v8, v8, v11
; ZVBB-NEXT: ret
%res = call <vscale x 8 x bfloat> @llvm.vector.interleave2.nxv8bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b)
ret <vscale x 8 x bfloat> %res
@@ -417,18 +425,20 @@ define <vscale x 8 x half> @vector_interleave_nxv8f16_nxv4f16(<vscale x 4 x half
; CHECK-LABEL: vector_interleave_nxv8f16_nxv4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vwaddu.vv v10, v8, v9
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwaddu.vv v8, v11, v10
; CHECK-NEXT: li a0, -1
-; CHECK-NEXT: vwmaccu.vx v10, a0, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vwmaccu.vx v8, a0, v10
; CHECK-NEXT: ret
;
; ZVBB-LABEL: vector_interleave_nxv8f16_nxv4f16:
; ZVBB: # %bb.0:
; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; ZVBB-NEXT: vwsll.vi v10, v9, 16
-; ZVBB-NEXT: vwaddu.wv v10, v10, v8
-; ZVBB-NEXT: vmv2r.v v8, v10
+; ZVBB-NEXT: vmv1r.v v10, v9
+; ZVBB-NEXT: vmv1r.v v11, v8
+; ZVBB-NEXT: vwsll.vi v8, v10, 16
+; ZVBB-NEXT: vwaddu.wv v8, v8, v11
; ZVBB-NEXT: ret
%res = call <vscale x 8 x half> @llvm.vector.interleave2.nxv8f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b)
ret <vscale x 8 x half> %res
@@ -438,19 +448,21 @@ define <vscale x 4 x float> @vector_interleave_nxv4f32_nxv2f32(<vscale x 2 x flo
; CHECK-LABEL: vector_interleave_nxv4f32_nxv2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vwaddu.vv v10, v8, v9
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwaddu.vv v8, v11, v10
; CHECK-NEXT: li a0, -1
-; CHECK-NEXT: vwmaccu.vx v10, a0, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vwmaccu.vx v8, a0, v10
; CHECK-NEXT: ret
;
; ZVBB-LABEL: vector_interleave_nxv4f32_nxv2f32:
; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; ZVBB-NEXT: vmv1r.v v10, v9
+; ZVBB-NEXT: vmv1r.v v11, v8
; ZVBB-NEXT: li a0, 32
-; ZVBB-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; ZVBB-NEXT: vwsll.vx v10, v9, a0
-; ZVBB-NEXT: vwaddu.wv v10, v10, v8
-; ZVBB-NEXT: vmv2r.v v8, v10
+; ZVBB-NEXT: vwsll.vx v8, v10, a0
+; ZVBB-NEXT: vwaddu.wv v8, v8, v11
; ZVBB-NEXT: ret
%res = call <vscale x 4 x float> @llvm.vector.interleave2.nxv4f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b)
ret <vscale x 4 x float> %res
@@ -460,18 +472,20 @@ define <vscale x 16 x bfloat> @vector_interleave_nxv16bf16_nxv8bf16(<vscale x 8
; CHECK-LABEL: vector_interleave_nxv16bf16_nxv8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vwaddu.vv v12, v8, v10
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwaddu.vv v8, v14, v12
; CHECK-NEXT: li a0, -1
-; CHECK-NEXT: vwmaccu.vx v12, a0, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vwmaccu.vx v8, a0, v12
; CHECK-NEXT: ret
;
; ZVBB-LABEL: vector_interleave_nxv16bf16_nxv8bf16:
; ZVBB: # %bb.0:
; ZVBB-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; ZVBB-NEXT: vwsll.vi v12, v10, 16
-; ZVBB-NEXT: vwaddu.wv v12, v12, v8
-; ZVBB-NEXT: vmv4r.v v8, v12
+; ZVBB-NEXT: vmv2r.v v12, v10
+; ZVBB-NEXT: vmv2r.v v14, v8
+; ZVBB-NEXT: vwsll.vi v8, v12, 16
+; ZVBB-NEXT: vwaddu.wv v8, v8, v14
; ZVBB-NEXT: ret
%res = call <vscale x 16 x bfloat> @llvm.vector.interleave2.nxv16bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b)
ret <vscale x 16 x bfloat> %res
@@ -481,18 +495,20 @@ define <vscale x 16 x half> @vector_interleave_nxv16f16_nxv8f16(<vscale x 8 x ha
; CHECK-LABEL: vector_interleave_nxv16f16_nxv8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vwaddu.vv v12, v8, v10
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwaddu.vv v8, v14, v12
; CHECK-NEXT: li a0, -1
-; CHECK-NEXT: vwmaccu.vx v12, a0, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vwmaccu.vx v8, a0, v12
; CHECK-NEXT: ret
;
; ZVBB-LABEL: vector_interleave_nxv16f16_nxv8f16:
; ZVBB: # %bb.0:
; ZVBB-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; ZVBB-NEXT: vwsll.vi v12, v10, 16
-; ZVBB-NEXT: vwaddu.wv v12, v12, v8
-; ZVBB-NEXT: vmv4r.v v8, v12
+; ZVBB-NEXT: vmv2r.v v12, v10
+; ZVBB-NEXT: vmv2r.v v14, v8
+; ZVBB-NEXT: vwsll.vi v8, v12, 16
+; ZVBB-NEXT: vwaddu.wv v8, v8, v14
; ZVBB-NEXT: ret
%res = call <vscale x 16 x half> @llvm.vector.interleave2.nxv16f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b)
ret <vscale x 16 x half> %res
@@ -502,19 +518,21 @@ define <vscale x 8 x float> @vector_interleave_nxv8f32_nxv4f32(<vscale x 4 x flo
; CHECK-LABEL: vector_interleave_nxv8f32_nxv4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vwaddu.vv v12, v8, v10
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwaddu.vv v8, v14, v12
; CHECK-NEXT: li a0, -1
-; CHECK-NEXT: vwmaccu.vx v12, a0, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vwmaccu.vx v8, a0, v12
; CHECK-NEXT: ret
;
; ZVBB-LABEL: vector_interleave_nxv8f32_nxv4f32:
; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; ZVBB-NEXT: vmv2r.v v12, v10
+; ZVBB-NEXT: vmv2r.v v14, v8
; ZVBB-NEXT: li a0, 32
-; ZVBB-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVBB-NEXT: vwsll.vx v12, v10, a0
-; ZVBB-NEXT: vwaddu.wv v12, v12, v8
-; ZVBB-NEXT: vmv4r.v v8, v12
+; ZVBB-NEXT: vwsll.vx v8, v12, a0
+; ZVBB-NEXT: vwaddu.wv v8, v8, v14
; ZVBB-NEXT: ret
%res = call <vscale x 8 x float> @llvm.vector.interleave2.nxv8f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b)
ret <vscale x 8 x float> %res
@@ -684,15 +702,15 @@ define <vscale x 8 x i32> @vector_interleave_nxv8i32_nxv4i32_poison(<vscale x 4
; CHECK-LABEL: vector_interleave_nxv8i32_nxv4i32_poison:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT: vzext.vf2 v12, v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vzext.vf2 v8, v12
; CHECK-NEXT: ret
;
; ZVBB-LABEL: vector_interleave_nxv8i32_nxv4i32_poison:
; ZVBB: # %bb.0:
; ZVBB-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; ZVBB-NEXT: vzext.vf2 v12, v8
-; ZVBB-NEXT: vmv.v.v v8, v12
+; ZVBB-NEXT: vmv2r.v v12, v8
+; ZVBB-NEXT: vzext.vf2 v8, v12
; ZVBB-NEXT: ret
%res = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> poison)
ret <vscale x 8 x i32> %res
@@ -709,10 +727,10 @@ define <vscale x 8 x i32> @vector_interleave_nxv8i32_nxv4i32_poison2(<vscale x 4
;
; ZVBB-LABEL: vector_interleave_nxv8i32_nxv4i32_poison2:
; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; ZVBB-NEXT: vmv2r.v v12, v8
; ZVBB-NEXT: li a0, 32
-; ZVBB-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; ZVBB-NEXT: vwsll.vx v12, v8, a0
-; ZVBB-NEXT: vmv4r.v v8, v12
+; ZVBB-NEXT: vwsll.vx v8, v12, a0
; ZVBB-NEXT: ret
%res = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a)
ret <vscale x 8 x i32> %res
@@ -2062,37 +2080,37 @@ define <vscale x 112 x i1> @vector_interleave_nxv112i1_nxv16i1(<vscale x 16 x i1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: vmerge.vim v16, v14, 1, v0
+; CHECK-NEXT: vmerge.vim v18, v14, 1, v0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v22, v14, 1, v0
+; CHECK-NEXT: vmerge.vim v16, v14, 1, v0
; CHECK-NEXT: add a3, a4, a2
; CHECK-NEXT: srli a1, a2, 2
; CHECK-NEXT: add a5, a0, a2
-; CHECK-NEXT: vmv4r.v v24, v16
+; CHECK-NEXT: vmv2r.v v2, v18
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vmerge.vim v18, v14, 1, v0
+; CHECK-NEXT: vmerge.vim v20, v14, 1, v0
; CHECK-NEXT: add a6, a3, a2
-; CHECK-NEXT: vmv1r.v v25, v22
+; CHECK-NEXT: vmv1r.v v3, v16
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vmerge.vim v8, v14, 1, v0
-; CHECK-NEXT: vmv1r.v v26, v18
+; CHECK-NEXT: vmv1r.v v4, v20
; CHECK-NEXT: vmv1r.v v0, v11
-; CHECK-NEXT: vmerge.vim v20, v14, 1, v0
-; CHECK-NEXT: vmv1r.v v27, v8
+; CHECK-NEXT: vmerge.vim v22, v14, 1, v0
+; CHECK-NEXT: vmv1r.v v5, v8
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vmerge.vim v10, v14, 1, v0
-; CHECK-NEXT: vmv1r.v v28, v20
-; CHECK-NEXT: vmv1r.v v18, v23
+; CHECK-NEXT: vmv1r.v v6, v22
+; CHECK-NEXT: vmv1r.v v20, v17
; CHECK-NEXT: add a7, a6, a2
-; CHECK-NEXT: vmv1r.v v29, v10
-; CHECK-NEXT: vmv1r.v v20, v9
+; CHECK-NEXT: vmv1r.v v7, v10
+; CHECK-NEXT: vmv1r.v v22, v9
; CHECK-NEXT: vmv1r.v v0, v13
-; CHECK-NEXT: vmerge.vim v30, v14, 1, v0
-; CHECK-NEXT: vmv1r.v v22, v11
+; CHECK-NEXT: vmerge.vim v8, v14, 1, v0
+; CHECK-NEXT: vmv1r.v v24, v11
; CHECK-NEXT: vsetvli t0, zero, e8, m1, ta, ma
-; CHECK-NEXT: vsseg7e8.v v24, (a4)
-; CHECK-NEXT: vmv1r.v v23, v31
-; CHECK-NEXT: vsseg7e8.v v17, (a0)
+; CHECK-NEXT: vsseg7e8.v v2, (a4)
+; CHECK-NEXT: vmv1r.v v25, v9
+; CHECK-NEXT: vsseg7e8.v v19, (a0)
; CHECK-NEXT: vl1r.v v8, (a6)
; CHECK-NEXT: add a6, a7, a2
; CHECK-NEXT: vl1r.v v10, (a4)
@@ -2156,37 +2174,37 @@ define <vscale x 112 x i1> @vector_interleave_nxv112i1_nxv16i1(<vscale x 16 x i1
; ZVBB-NEXT: add a0, sp, a0
; ZVBB-NEXT: addi a0, a0, 16
; ZVBB-NEXT: csrr a2, vlenb
-; ZVBB-NEXT: vmerge.vim v16, v14, 1, v0
+; ZVBB-NEXT: vmerge.vim v18, v14, 1, v0
; ZVBB-NEXT: vmv1r.v v0, v8
-; ZVBB-NEXT: vmerge.vim v22, v14, 1, v0
+; ZVBB-NEXT: vmerge.vim v16, v14, 1, v0
; ZVBB-NEXT: add a3, a4, a2
; ZVBB-NEXT: srli a1, a2, 2
; ZVBB-NEXT: add a5, a0, a2
-; ZVBB-NEXT: vmv4r.v v24, v16
+; ZVBB-NEXT: vmv2r.v v2, v18
; ZVBB-NEXT: vmv1r.v v0, v9
-; ZVBB-NEXT: vmerge.vim v18, v14, 1, v0
+; ZVBB-NEXT: vmerge.vim v20, v14, 1, v0
; ZVBB-NEXT: add a6, a3, a2
-; ZVBB-NEXT: vmv1r.v v25, v22
+; ZVBB-NEXT: vmv1r.v v3, v16
; ZVBB-NEXT: vmv1r.v v0, v10
; ZVBB-NEXT: vmerge.vim v8, v14, 1, v0
-; ZVBB-NEXT: vmv1r.v v26, v18
+; ZVBB-NEXT: vmv1r.v v4, v20
; ZVBB-NEXT: vmv1r.v v0, v11
-; ZVBB-NEXT: vmerge.vim v20, v14, 1, v0
-; ZVBB-NEXT: vmv1r.v v27, v8
+; ZVBB-NEXT: vmerge.vim v22, v14, 1, v0
+; ZVBB-NEXT: vmv1r.v v5, v8
; ZVBB-NEXT: vmv1r.v v0, v12
; ZVBB-NEXT: vmerge.vim v10, v14, 1, v0
-; ZVBB-NEXT: vmv1r.v v28, v20
-; ZVBB-NEXT: vmv1r.v v18, v23
+; ZVBB-NEXT: vmv1r.v v6, v22
+; ZVBB-NEXT: vmv1r.v v20, v17
; ZVBB-NEXT: add a7, a6, a2
-; ZVBB-NEXT: vmv1r.v v29, v10
-; ZVBB-NEXT: vmv1r.v v20, v9
+; ZVBB-NEXT: vmv1r.v v7, v10
+; ZVBB-NEXT: vmv1r.v v22, v9
; ZVBB-NEXT: vmv1r.v v0, v13
-; ZVBB-NEXT: vmerge.vim v30, v14, 1, v0
-; ZVBB-NEXT: vmv1r.v v22, v11
+; ZVBB-NEXT: vmerge.vim v8, v14, 1, v0
+; ZVBB-NEXT: vmv1r.v v24, v11
; ZVBB-NEXT: vsetvli t0, zero, e8, m1, ta, ma
-; ZVBB-NEXT: vsseg7e8.v v24, (a4)
-; ZVBB-NEXT: vmv1r.v v23, v31
-; ZVBB-NEXT: vsseg7e8.v v17, (a0)
+; ZVBB-NEXT: vsseg7e8.v v2, (a4)
+; ZVBB-NEXT: vmv1r.v v25, v9
+; ZVBB-NEXT: vsseg7e8.v v19, (a0)
; ZVBB-NEXT: vl1r.v v8, (a6)
; ZVBB-NEXT: add a6, a7, a2
; ZVBB-NEXT: vl1r.v v10, (a4)
@@ -2281,39 +2299,39 @@ define <vscale x 112 x i8> @vector_interleave_nxv112i8_nxv16i8(<vscale x 16 x i8
; RV32-NEXT: vsseg7e8.v v1, (a0)
; RV32-NEXT: vmv1r.v v26, v19
; RV32-NEXT: vsseg7e8.v v21, (a1)
-; RV32-NEXT: vl1r.v v10, (a6)
+; RV32-NEXT: vl1r.v v18, (a6)
; RV32-NEXT: add a6, a6, a2
-; RV32-NEXT: vl1r.v v11, (a6)
+; RV32-NEXT: vl1r.v v19, (a6)
; RV32-NEXT: add a6, a6, a2
-; RV32-NEXT: vl1r.v v12, (a6)
+; RV32-NEXT: vl1r.v v20, (a6)
; RV32-NEXT: add a6, a6, a2
-; RV32-NEXT: vl1r.v v13, (a6)
+; RV32-NEXT: vl1r.v v21, (a6)
; RV32-NEXT: add a6, a3, a2
-; RV32-NEXT: vl1r.v v18, (a6)
+; RV32-NEXT: vl1r.v v10, (a6)
; RV32-NEXT: add a6, a6, a2
-; RV32-NEXT: vl1r.v v19, (a6)
-; RV32-NEXT: vl1r.v v16, (a0)
-; RV32-NEXT: vl1r.v v8, (a4)
-; RV32-NEXT: vl1r.v v17, (a3)
-; RV32-NEXT: vl1r.v v9, (a7)
+; RV32-NEXT: vl1r.v v11, (a6)
+; RV32-NEXT: vl1r.v v8, (a0)
+; RV32-NEXT: vl1r.v v16, (a4)
+; RV32-NEXT: vl1r.v v9, (a3)
+; RV32-NEXT: vl1r.v v17, (a7)
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: li a3, 14
; RV32-NEXT: mul a0, a0, a3
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 64
; RV32-NEXT: add a6, a6, a2
-; RV32-NEXT: vl1r.v v20, (a6)
+; RV32-NEXT: vl1r.v v12, (a6)
; RV32-NEXT: add a6, a6, a2
-; RV32-NEXT: vl1r.v v21, (a6)
+; RV32-NEXT: vl1r.v v13, (a6)
; RV32-NEXT: add a6, a6, a2
; RV32-NEXT: slli a2, a2, 3
; RV32-NEXT: add a2, a0, a2
-; RV32-NEXT: vl1r.v v22, (a6)
-; RV32-NEXT: vl1r.v v23, (a1)
+; RV32-NEXT: vl1r.v v14, (a6)
+; RV32-NEXT: vl1r.v v15, (a1)
; RV32-NEXT: add a5, a0, a5
-; RV32-NEXT: vs2r.v v12, (a5)
-; RV32-NEXT: vs4r.v v8, (a2)
-; RV32-NEXT: vs8r.v v16, (a0)
+; RV32-NEXT: vs2r.v v20, (a5)
+; RV32-NEXT: vs4r.v v16, (a2)
+; RV32-NEXT: vs8r.v v8, (a0)
; RV32-NEXT: vl8r.v v16, (a2)
; RV32-NEXT: vl8r.v v8, (a0)
; RV32-NEXT: addi sp, s0, -80
@@ -2363,39 +2381,39 @@ define <vscale x 112 x i8> @vector_interleave_nxv112i8_nxv16i8(<vscale x 16 x i8
; RV64-NEXT: vsseg7e8.v v1, (a0)
; RV64-NEXT: vmv1r.v v26, v19
; RV64-NEXT: vsseg7e8.v v21, (a1)
-; RV64-NEXT: vl1r.v v10, (a6)
+; RV64-NEXT: vl1r.v v18, (a6)
; RV64-NEXT: add a6, a6, a2
-; RV64-NEXT: vl1r.v v11, (a6)
+; RV64-NEXT: vl1r.v v19, (a6)
; RV64-NEXT: add a6, a6, a2
-; RV64-NEXT: vl1r.v v12, (a6)
+; RV64-NEXT: vl1r.v v20, (a6)
; RV64-NEXT: add a6, a6, a2
-; RV64-NEXT: vl1r.v v13, (a6)
+; RV64-NEXT: vl1r.v v21, (a6)
; RV64-NEXT: add a6, a3, a2
-; RV64-NEXT: vl1r.v v18, (a6)
+; RV64-NEXT: vl1r.v v10, (a6)
; RV64-NEXT: add a6, a6, a2
-; RV64-NEXT: vl1r.v v19, (a6)
-; RV64-NEXT: vl1r.v v16, (a0)
-; RV64-NEXT: vl1r.v v8, (a4)
-; RV64-NEXT: vl1r.v v17, (a3)
-; RV64-NEXT: vl1r.v v9, (a7)
+; RV64-NEXT: vl1r.v v11, (a6)
+; RV64-NEXT: vl1r.v v8, (a0)
+; RV64-NEXT: vl1r.v v16, (a4)
+; RV64-NEXT: vl1r.v v9, (a3)
+; RV64-NEXT: vl1r.v v17, (a7)
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: li a3, 14
; RV64-NEXT: mul a0, a0, a3
; RV64-NEXT: add a0, sp, a0
; RV64-NEXT: addi a0, a0, 64
; RV64-NEXT: add a6, a6, a2
-; RV64-NEXT: vl1r.v v20, (a6)
+; RV64-NEXT: vl1r.v v12, (a6)
; RV64-NEXT: add a6, a6, a2
-; RV64-NEXT: vl1r.v v21, (a6)
+; RV64-NEXT: vl1r.v v13, (a6)
; RV64-NEXT: add a6, a6, a2
; RV64-NEXT: slli a2, a2, 3
; RV64-NEXT: add a2, a0, a2
-; RV64-NEXT: vl1r.v v22, (a6)
-; RV64-NEXT: vl1r.v v23, (a1)
+; RV64-NEXT: vl1r.v v14, (a6)
+; RV64-NEXT: vl1r.v v15, (a1)
; RV64-NEXT: add a5, a0, a5
-; RV64-NEXT: vs2r.v v12, (a5)
-; RV64-NEXT: vs4r.v v8, (a2)
-; RV64-NEXT: vs8r.v v16, (a0)
+; RV64-NEXT: vs2r.v v20, (a5)
+; RV64-NEXT: vs4r.v v16, (a2)
+; RV64-NEXT: vs8r.v v8, (a0)
; RV64-NEXT: vl8r.v v16, (a2)
; RV64-NEXT: vl8r.v v8, (a0)
; RV64-NEXT: addi sp, s0, -80
@@ -2445,39 +2463,39 @@ define <vscale x 112 x i8> @vector_interleave_nxv112i8_nxv16i8(<vscale x 16 x i8
; ZVBB-RV32-NEXT: vsseg7e8.v v1, (a0)
; ZVBB-RV32-NEXT: vmv1r.v v26, v19
; ZVBB-RV32-NEXT: vsseg7e8.v v21, (a1)
-; ZVBB-RV32-NEXT: vl1r.v v10, (a6)
+; ZVBB-RV32-NEXT: vl1r.v v18, (a6)
; ZVBB-RV32-NEXT: add a6, a6, a2
-; ZVBB-RV32-NEXT: vl1r.v v11, (a6)
+; ZVBB-RV32-NEXT: vl1r.v v19, (a6)
; ZVBB-RV32-NEXT: add a6, a6, a2
-; ZVBB-RV32-NEXT: vl1r.v v12, (a6)
+; ZVBB-RV32-NEXT: vl1r.v v20, (a6)
; ZVBB-RV32-NEXT: add a6, a6, a2
-; ZVBB-RV32-NEXT: vl1r.v v13, (a6)
+; ZVBB-RV32-NEXT: vl1r.v v21, (a6)
; ZVBB-RV32-NEXT: add a6, a3, a2
-; ZVBB-RV32-NEXT: vl1r.v v18, (a6)
+; ZVBB-RV32-NEXT: vl1r.v v10, (a6)
; ZVBB-RV32-NEXT: add a6, a6, a2
-; ZVBB-RV32-NEXT: vl1r.v v19, (a6)
-; ZVBB-RV32-NEXT: vl1r.v v16, (a0)
-; ZVBB-RV32-NEXT: vl1r.v v8, (a4)
-; ZVBB-RV32-NEXT: vl1r.v v17, (a3)
-; ZVBB-RV32-NEXT: vl1r.v v9, (a7)
+; ZVBB-RV32-NEXT: vl1r.v v11, (a6)
+; ZVBB-RV32-NEXT: vl1r.v v8, (a0)
+; ZVBB-RV32-NEXT: vl1r.v v16, (a4)
+; ZVBB-RV32-NEXT: vl1r.v v9, (a3)
+; ZVBB-RV32-NEXT: vl1r.v v17, (a7)
; ZVBB-RV32-NEXT: csrr a0, vlenb
; ZVBB-RV32-NEXT: li a3, 14
; ZVBB-RV32-NEXT: mul a0, a0, a3
; ZVBB-RV32-NEXT: add a0, sp, a0
; ZVBB-RV32-NEXT: addi a0, a0, 64
; ZVBB-RV32-NEXT: add a6, a6, a2
-; ZVBB-RV32-NEXT: vl1r.v v20, (a6)
+; ZVBB-RV32-NEXT: vl1r.v v12, (a6)
; ZVBB-RV32-NEXT: add a6, a6, a2
-; ZVBB-RV32-NEXT: vl1r.v v21, (a6)
+; ZVBB-RV32-NEXT: vl1r.v v13, (a6)
; ZVBB-RV32-NEXT: add a6, a6, a2
; ZVBB-RV32-NEXT: slli a2, a2, 3
; ZVBB-RV32-NEXT: add a2, a0, a2
-; ZVBB-RV32-NEXT: vl1r.v v22, (a6)
-; ZVBB-RV32-NEXT: vl1r.v v23, (a1)
+; ZVBB-RV32-NEXT: vl1r.v v14, (a6)
+; ZVBB-RV32-NEXT: vl1r.v v15, (a1)
; ZVBB-RV32-NEXT: add a5, a0, a5
-; ZVBB-RV32-NEXT: vs2r.v v12, (a5)
-; ZVBB-RV32-NEXT: vs4r.v v8, (a2)
-; ZVBB-RV32-NEXT: vs8r.v v16, (a0)
+; ZVBB-RV32-NEXT: vs2r.v v20, (a5)
+; ZVBB-RV32-NEXT: vs4r.v v16, (a2)
+; ZVBB-RV32-NEXT: vs8r.v v8, (a0)
; ZVBB-RV32-NEXT: vl8r.v v16, (a2)
; ZVBB-RV32-NEXT: vl8r.v v8, (a0)
; ZVBB-RV32-NEXT: addi sp, s0, -80
@@ -2527,39 +2545,39 @@ define <vscale x 112 x i8> @vector_interleave_nxv112i8_nxv16i8(<vscale x 16 x i8
; ZVBB-RV64-NEXT: vsseg7e8.v v1, (a0)
; ZVBB-RV64-NEXT: vmv1r.v v26, v19
; ZVBB-RV64-NEXT: vsseg7e8.v v21, (a1)
-; ZVBB-RV64-NEXT: vl1r.v v10, (a6)
+; ZVBB-RV64-NEXT: vl1r.v v18, (a6)
; ZVBB-RV64-NEXT: add a6, a6, a2
-; ZVBB-RV64-NEXT: vl1r.v v11, (a6)
+; ZVBB-RV64-NEXT: vl1r.v v19, (a6)
; ZVBB-RV64-NEXT: add a6, a6, a2
-; ZVBB-RV64-NEXT: vl1r.v v12, (a6)
+; ZVBB-RV64-NEXT: vl1r.v v20, (a6)
; ZVBB-RV64-NEXT: add a6, a6, a2
-; ZVBB-RV64-NEXT: vl1r.v v13, (a6)
+; ZVBB-RV64-NEXT: vl1r.v v21, (a6)
; ZVBB-RV64-NEXT: add a6, a3, a2
-; ZVBB-RV64-NEXT: vl1r.v v18, (a6)
+; ZVBB-RV64-NEXT: vl1r.v v10, (a6)
; ZVBB-RV64-NEXT: add a6, a6, a2
-; ZVBB-RV64-NEXT: vl1r.v v19, (a6)
-; ZVBB-RV64-NEXT: vl1r.v v16, (a0)
-; ZVBB-RV64-NEXT: vl1r.v v8, (a4)
-; ZVBB-RV64-NEXT: vl1r.v v17, (a3)
-; ZVBB-RV64-NEXT: vl1r.v v9, (a7)
+; ZVBB-RV64-NEXT: vl1r.v v11, (a6)
+; ZVBB-RV64-NEXT: vl1r.v v8, (a0)
+; ZVBB-RV64-NEXT: vl1r.v v16, (a4)
+; ZVBB-RV64-NEXT: vl1r.v v9, (a3)
+; ZVBB-RV64-NEXT: vl1r.v v17, (a7)
; ZVBB-RV64-NEXT: csrr a0, vlenb
; ZVBB-RV64-NEXT: li a3, 14
; ZVBB-RV64-NEXT: mul a0, a0, a3
; ZVBB-RV64-NEXT: add a0, sp, a0
; ZVBB-RV64-NEXT: addi a0, a0, 64
; ZVBB-RV64-NEXT: add a6, a6, a2
-; ZVBB-RV64-NEXT: vl1r.v v20, (a6)
+; ZVBB-RV64-NEXT: vl1r.v v12, (a6)
; ZVBB-RV64-NEXT: add a6, a6, a2
-; ZVBB-RV64-NEXT: vl1r.v v21, (a6)
+; ZVBB-RV64-NEXT: vl1r.v v13, (a6)
; ZVBB-RV64-NEXT: add a6, a6, a2
; ZVBB-RV64-NEXT: slli a2, a2, 3
; ZVBB-RV64-NEXT: add a2, a0, a2
-; ZVBB-RV64-NEXT: vl1r.v v22, (a6)
-; ZVBB-RV64-NEXT: vl1r.v v23, (a1)
+; ZVBB-RV64-NEXT: vl1r.v v14, (a6)
+; ZVBB-RV64-NEXT: vl1r.v v15, (a1)
; ZVBB-RV64-NEXT: add a5, a0, a5
-; ZVBB-RV64-NEXT: vs2r.v v12, (a5)
-; ZVBB-RV64-NEXT: vs4r.v v8, (a2)
-; ZVBB-RV64-NEXT: vs8r.v v16, (a0)
+; ZVBB-RV64-NEXT: vs2r.v v20, (a5)
+; ZVBB-RV64-NEXT: vs4r.v v16, (a2)
+; ZVBB-RV64-NEXT: vs8r.v v8, (a0)
; ZVBB-RV64-NEXT: vl8r.v v16, (a2)
; ZVBB-RV64-NEXT: vl8r.v v8, (a0)
; ZVBB-RV64-NEXT: addi sp, s0, -80
@@ -2615,39 +2633,39 @@ define <vscale x 56 x i16> @vector_interleave_nxv56i16_nxv8i16(<vscale x 8 x i16
; RV32-NEXT: vsseg7e16.v v1, (a0)
; RV32-NEXT: vmv1r.v v26, v19
; RV32-NEXT: vsseg7e16.v v21, (a1)
-; RV32-NEXT: vl1re16.v v10, (a6)
+; RV32-NEXT: vl1re16.v v18, (a6)
; RV32-NEXT: add a6, a6, a2
-; RV32-NEXT: vl1re16.v v11, (a6)
+; RV32-NEXT: vl1re16.v v19, (a6)
; RV32-NEXT: add a6, a6, a2
-; RV32-NEXT: vl1re16.v v12, (a6)
+; RV32-NEXT: vl1re16.v v20, (a6)
; RV32-NEXT: add a6, a6, a2
-; RV32-NEXT: vl1re16.v v13, (a6)
+; RV32-NEXT: vl1re16.v v21, (a6)
; RV32-NEXT: add a6, a3, a2
-; RV32-NEXT: vl1re16.v v18, (a6)
+; RV32-NEXT: vl1re16.v v10, (a6)
; RV32-NEXT: add a6, a6, a2
-; RV32-NEXT: vl1re16.v v19, (a6)
-; RV32-NEXT: vl1re16.v v16, (a0)
-; RV32-NEXT: vl1re16.v v8, (a4)
-; RV32-NEXT: vl1re16.v v17, (a3)
-; RV32-NEXT: vl1re16.v v9, (a7)
+; RV32-NEXT: vl1re16.v v11, (a6)
+; RV32-NEXT: vl1re16.v v8, (a0)
+; RV32-NEXT: vl1re16.v v16, (a4)
+; RV32-NEXT: vl1re16.v v9, (a3)
+; RV32-NEXT: vl1re16.v v17, (a7)
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: li a3, 14
; RV32-NEXT: mul a0, a0, a3
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 64
; RV32-NEXT: add a6, a6, a2
-; RV32-NEXT: vl1re16.v v20, (a6)
+; RV32-NEXT: vl1re16.v v12, (a6)
; RV32-NEXT: add a6, a6, a2
-; RV32-NEXT: vl1re16.v v21, (a6)
+; RV32-NEXT: vl1re16.v v13, (a6)
; RV32-NEXT: add a6, a6, a2
; RV32-NEXT: slli a2, a2, 3
; RV32-NEXT: add a2, a0, a2
-; RV32-NEXT: vl1re16.v v22, (a6)
-; RV32-NEXT: vl1re16.v v23, (a1)
+; RV32-NEXT: vl1re16.v v14, (a6)
+; RV32-NEXT: vl1re16.v v15, (a1)
; RV32-NEXT: add a5, a0, a5
-; RV32-NEXT: vs2r.v v12, (a5)
-; RV32-NEXT: vs4r.v v8, (a2)
-; RV32-NEXT: vs8r.v v16, (a0)
+; RV32-NEXT: vs2r.v v20, (a5)
+; RV32-NEXT: vs4r.v v16, (a2)
+; RV32-NEXT: vs8r.v v8, (a0)
; RV32-NEXT: vl8re16.v v16, (a2)
; RV32-NEXT: vl8re16.v v8, (a0)
; RV32-NEXT: addi sp, s0, -80
@@ -2697,39 +2715,39 @@ define <vscale x 56 x i16> @vector_interleave_nxv56i16_nxv8i16(<vscale x 8 x i16
; RV64-NEXT: vsseg7e16.v v1, (a0)
; RV64-NEXT: vmv1r.v v26, v19
; RV64-NEXT: vsseg7e16.v v21, (a1)
-; RV64-NEXT: vl1re16.v v10, (a6)
+; RV64-NEXT: vl1re16.v v18, (a6)
; RV64-NEXT: add a6, a6, a2
-; RV64-NEXT: vl1re16.v v11, (a6)
+; RV64-NEXT: vl1re16.v v19, (a6)
; RV64-NEXT: add a6, a6, a2
-; RV64-NEXT: vl1re16.v v12, (a6)
+; RV64-NEXT: vl1re16.v v20, (a6)
; RV64-NEXT: add a6, a6, a2
-; RV64-NEXT: vl1re16.v v13, (a6)
+; RV64-NEXT: vl1re16.v v21, (a6)
; RV64-NEXT: add a6, a3, a2
-; RV64-NEXT: vl1re16.v v18, (a6)
+; RV64-NEXT: vl1re16.v v10, (a6)
; RV64-NEXT: add a6, a6, a2
-; RV64-NEXT: vl1re16.v v19, (a6)
-; RV64-NEXT: vl1re16.v v16, (a0)
-; RV64-NEXT: vl1re16.v v8, (a4)
-; RV64-NEXT: vl1re16.v v17, (a3)
-; RV64-NEXT: vl1re16.v v9, (a7)
+; RV64-NEXT: vl1re16.v v11, (a6)
+; RV64-NEXT: vl1re16.v v8, (a0)
+; RV64-NEXT: vl1re16.v v16, (a4)
+; RV64-NEXT: vl1re16.v v9, (a3)
+; RV64-NEXT: vl1re16.v v17, (a7)
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: li a3, 14
; RV64-NEXT: mul a0, a0, a3
; RV64-NEXT: add a0, sp, a0
; RV64-NEXT: addi a0, a0, 64
; RV64-NEXT: add a6, a6, a2
-; RV64-NEXT: vl1re16.v v20, (a6)
+; RV64-NEXT: vl1re16.v v12, (a6)
; RV64-NEXT: add a6, a6, a2
-; RV64-NEXT: vl1re16.v v21, (a6)
+; RV64-NEXT: vl1re16.v v13, (a6)
; RV64-NEXT: add a6, a6, a2
; RV64-NEXT: slli a2, a2, 3
; RV64-NEXT: add a2, a0, a2
-; RV64-NEXT: vl1re16.v v22, (a6)
-; RV64-NEXT: vl1re16.v v23, (a1)
+; RV64-NEXT: vl1re16.v v14, (a6)
+; RV64-NEXT: vl1re16.v v15, (a1)
; RV64-NEXT: add a5, a0, a5
-; RV64-NEXT: vs2r.v v12, (a5)
-; RV64-NEXT: vs4r.v v8, (a2)
-; RV64-NEXT: vs8r.v v16, (a0)
+; RV64-NEXT: vs2r.v v20, (a5)
+; RV64-NEXT: vs4r.v v16, (a2)
+; RV64-NEXT: vs8r.v v8, (a0)
; RV64-NEXT: vl8re16.v v16, (a2)
; RV64-NEXT: vl8re16.v v8, (a0)
; RV64-NEXT: addi sp, s0, -80
@@ -2779,39 +2797,39 @@ define <vscale x 56 x i16> @vector_interleave_nxv56i16_nxv8i16(<vscale x 8 x i16
; ZVBB-RV32-NEXT: vsseg7e16.v v1, (a0)
; ZVBB-RV32-NEXT: vmv1r.v v26, v19
; ZVBB-RV32-NEXT: vsseg7e16.v v21, (a1)
-; ZVBB-RV32-NEXT: vl1re16.v v10, (a6)
+; ZVBB-RV32-NEXT: vl1re16.v v18, (a6)
; ZVBB-RV32-NEXT: add a6, a6, a2
-; ZVBB-RV32-NEXT: vl1re16.v v11, (a6)
+; ZVBB-RV32-NEXT: vl1re16.v v19, (a6)
; ZVBB-RV32-NEXT: add a6, a6, a2
-; ZVBB-RV32-NEXT: vl1re16.v v12, (a6)
+; ZVBB-RV32-NEXT: vl1re16.v v20, (a6)
; ZVBB-RV32-NEXT: add a6, a6, a2
-; ZVBB-RV32-NEXT: vl1re16.v v13, (a6)
+; ZVBB-RV32-NEXT: vl1re16.v v21, (a6)
; ZVBB-RV32-NEXT: add a6, a3, a2
-; ZVBB-RV32-NEXT: vl1re16.v v18, (a6)
+; ZVBB-RV32-NEXT: vl1re16.v v10, (a6)
; ZVBB-RV32-NEXT: add a6, a6, a2
-; ZVBB-RV32-NEXT: vl1re16.v v19, (a6)
-; ZVBB-RV32-NEXT: vl1re16.v v16, (a0)
-; ZVBB-RV32-NEXT: vl1re16.v v8, (a4)
-; ZVBB-RV32-NEXT: vl1re16.v v17, (a3)
-; ZVBB-RV32-NEXT: vl1re16.v v9, (a7)
+; ZVBB-RV32-NEXT: vl1re16.v v11, (a6)
+; ZVBB-RV32-NEXT: vl1re16.v v8, (a0)
+; ZVBB-RV32-NEXT: vl1re16.v v16, (a4)
+; ZVBB-RV32-NEXT: vl1re16.v v9, (a3)
+; ZVBB-RV32-NEXT: vl1re16.v v17, (a7)
; ZVBB-RV32-NEXT: csrr a0, vlenb
; ZVBB-RV32-NEXT: li a3, 14
; ZVBB-RV32-NEXT: mul a0, a0, a3
; ZVBB-RV32-NEXT: add a0, sp, a0
; ZVBB-RV32-NEXT: addi a0, a0, 64
; ZVBB-RV32-NEXT: add a6, a6, a2
-; ZVBB-RV32-NEXT: vl1re16.v v20, (a6)
+; ZVBB-RV32-NEXT: vl1re16.v v12, (a6)
; ZVBB-RV32-NEXT: add a6, a6, a2
-; ZVBB-RV32-NEXT: vl1re16.v v21, (a6)
+; ZVBB-RV32-NEXT: vl1re16.v v13, (a6)
; ZVBB-RV32-NEXT: add a6, a6, a2
; ZVBB-RV32-NEXT: slli a2, a2, 3
; ZVBB-RV32-NEXT: add a2, a0, a2
-; ZVBB-RV32-NEXT: vl1re16.v v22, (a6)
-; ZVBB-RV32-NEXT: vl1re16.v v23, (a1)
+; ZVBB-RV32-NEXT: vl1re16.v v14, (a6)
+; ZVBB-RV32-NEXT: vl1re16.v v15, (a1)
; ZVBB-RV32-NEXT: add a5, a0, a5
-; ZVBB-RV32-NEXT: vs2r.v v12, (a5)
-; ZVBB-RV32-NEXT: vs4r.v v8, (a2)
-; ZVBB-RV32-NEXT: vs8r.v v16, (a0)
+; ZVBB-RV32-NEXT: vs2r.v v20, (a5)
+; ZVBB-RV32-NEXT: vs4r.v v16, (a2)
+; ZVBB-RV32-NEXT: vs8r.v v8, (a0)
; ZVBB-RV32-NEXT: vl8re16.v v16, (a2)
; ZVBB-RV32-NEXT: vl8re16.v v8, (a0)
; ZVBB-RV32-NEXT: addi sp, s0, -80
@@ -2861,39 +2879,39 @@ define <vscale x 56 x i16> @vector_interleave_nxv56i16_nxv8i16(<vscale x 8 x i16
; ZVBB-RV64-NEXT: vsseg7e16.v v1, (a0)
; ZVBB-RV64-NEXT: vmv1r.v v26, v19
; ZVBB-RV64-NEXT: vsseg7e16.v v21, (a1)
-; ZVBB-RV64-NEXT: vl1re16.v v10, (a6)
+; ZVBB-RV64-NEXT: vl1re16.v v18, (a6)
; ZVBB-RV64-NEXT: add a6, a6, a2
-; ZVBB-RV64-NEXT: vl1re16.v v11, (a6)
+; ZVBB-RV64-NEXT: vl1re16.v v19, (a6)
; ZVBB-RV64-NEXT: add a6, a6, a2
-; ZVBB-RV64-NEXT: vl1re16.v v12, (a6)
+; ZVBB-RV64-NEXT: vl1re16.v v20, (a6)
; ZVBB-RV64-NEXT: add a6, a6, a2
-; ZVBB-RV64-NEXT: vl1re16.v v13, (a6)
+; ZVBB-RV64-NEXT: vl1re16.v v21, (a6)
; ZVBB-RV64-NEXT: add a6, a3, a2
-; ZVBB-RV64-NEXT: vl1re16.v v18, (a6)
+; ZVBB-RV64-NEXT: vl1re16.v v10, (a6)
; ZVBB-RV64-NEXT: add a6, a6, a2
-; ZVBB-RV64-NEXT: vl1re16.v v19, (a6)
-; ZVBB-RV64-NEXT: vl1re16.v v16, (a0)
-; ZVBB-RV64-NEXT: vl1re16.v v8, (a4)
-; ZVBB-RV64-NEXT: vl1re16.v v17, (a3)
-; ZVBB-RV64-NEXT: vl1re16.v v9, (a7)
+; ZVBB-RV64-NEXT: vl1re16.v v11, (a6)
+; ZVBB-RV64-NEXT: vl1re16.v v8, (a0)
+; ZVBB-RV64-NEXT: vl1re16.v v16, (a4)
+; ZVBB-RV64-NEXT: vl1re16.v v9, (a3)
+; ZVBB-RV64-NEXT: vl1re16.v v17, (a7)
; ZVBB-RV64-NEXT: csrr a0, vlenb
; ZVBB-RV64-NEXT: li a3, 14
; ZVBB-RV64-NEXT: mul a0, a0, a3
; ZVBB-RV64-NEXT: add a0, sp, a0
; ZVBB-RV64-NEXT: addi a0, a0, 64
; ZVBB-RV64-NEXT: add a6, a6, a2
-; ZVBB-RV64-NEXT: vl1re16.v v20, (a6)
+; ZVBB-RV64-NEXT: vl1re16.v v12, (a6)
; ZVBB-RV64-NEXT: add a6, a6, a2
-; ZVBB-RV64-NEXT: vl1re16.v v21, (a6)
+; ZVBB-RV64-NEXT: vl1re16.v v13, (a6)
; ZVBB-RV64-NEXT: add a6, a6, a2
; ZVBB-RV64-NEXT: slli a2, a2, 3
; ZVBB-RV64-NEXT: add a2, a0, a2
-; ZVBB-RV64-NEXT: vl1re16.v v22, (a6)
-; ZVBB-RV64-NEXT: vl1re16.v v23, (a1)
+; ZVBB-RV64-NEXT: vl1re16.v v14, (a6)
+; ZVBB-RV64-NEXT: vl1re16.v v15, (a1)
; ZVBB-RV64-NEXT: add a5, a0, a5
-; ZVBB-RV64-NEXT: vs2r.v v12, (a5)
-; ZVBB-RV64-NEXT: vs4r.v v8, (a2)
-; ZVBB-RV64-NEXT: vs8r.v v16, (a0)
+; ZVBB-RV64-NEXT: vs2r.v v20, (a5)
+; ZVBB-RV64-NEXT: vs4r.v v16, (a2)
+; ZVBB-RV64-NEXT: vs8r.v v8, (a0)
; ZVBB-RV64-NEXT: vl8re16.v v16, (a2)
; ZVBB-RV64-NEXT: vl8re16.v v8, (a0)
; ZVBB-RV64-NEXT: addi sp, s0, -80
@@ -2949,39 +2967,39 @@ define <vscale x 28 x i32> @vector_interleave_nxv28i32_nxv4i32(<vscale x 4 x i32
; RV32-NEXT: vsseg7e32.v v1, (a0)
; RV32-NEXT: vmv1r.v v26, v19
; RV32-NEXT: vsseg7e32.v v21, (a1)
-; RV32-NEXT: vl1re32.v v10, (a6)
+; RV32-NEXT: vl1re32.v v18, (a6)
; RV32-NEXT: add a6, a6, a2
-; RV32-NEXT: vl1re32.v v11, (a6)
+; RV32-NEXT: vl1re32.v v19, (a6)
; RV32-NEXT: add a6, a6, a2
-; RV32-NEXT: vl1re32.v v12, (a6)
+; RV32-NEXT: vl1re32.v v20, (a6)
; RV32-NEXT: add a6, a6, a2
-; RV32-NEXT: vl1re32.v v13, (a6)
+; RV32-NEXT: vl1re32.v v21, (a6)
; RV32-NEXT: add a6, a3, a2
-; RV32-NEXT: vl1re32.v v18, (a6)
+; RV32-NEXT: vl1re32.v v10, (a6)
; RV32-NEXT: add a6, a6, a2
-; RV32-NEXT: vl1re32.v v19, (a6)
-; RV32-NEXT: vl1re32.v v16, (a0)
-; RV32-NEXT: vl1re32.v v8, (a4)
-; RV32-NEXT: vl1re32.v v17, (a3)
-; RV32-NEXT: vl1re32.v v9, (a7)
+; RV32-NEXT: vl1re32.v v11, (a6)
+; RV32-NEXT: vl1re32.v v8, (a0)
+; RV32-NEXT: vl1re32.v v16, (a4)
+; RV32-NEXT: vl1re32.v v9, (a3)
+; RV32-NEXT: vl1re32.v v17, (a7)
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: li a3, 14
; RV32-NEXT: mul a0, a0, a3
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 64
; RV32-NEXT: add a6, a6, a2
-; RV32-NEXT: vl1re32.v v20, (a6)
+; RV32-NEXT: vl1re32.v v12, (a6)
; RV32-NEXT: add a6, a6, a2
-; RV32-NEXT: vl1re32.v v21, (a6)
+; RV32-NEXT: vl1re32.v v13, (a6)
; RV32-NEXT: add a6, a6, a2
; RV32-NEXT: slli a2, a2, 3
; RV32-NEXT: add a2, a0, a2
-; RV32-NEXT: vl1re32.v v22, (a6)
-; RV32-NEXT: vl1re32.v v23, (a1)
+; RV32-NEXT: vl1re32.v v14, (a6)
+; RV32-NEXT: vl1re32.v v15, (a1)
; RV32-NEXT: add a5, a0, a5
-; RV32-NEXT: vs2r.v v12, (a5)
-; RV32-NEXT: vs4r.v v8, (a2)
-; RV32-NEXT: vs8r.v v16, (a0)
+; RV32-NEXT: vs2r.v v20, (a5)
+; RV32-NEXT: vs4r.v v16, (a2)
+; RV32-NEXT: vs8r.v v8, (a0)
; RV32-NEXT: vl8re32.v v16, (a2)
; RV32-NEXT: vl8re32.v v8, (a0)
; RV32-NEXT: addi sp, s0, -80
@@ -3031,39 +3049,39 @@ define <vscale x 28 x i32> @vector_interleave_nxv28i32_nxv4i32(<vscale x 4 x i32
; RV64-NEXT: vsseg7e32.v v1, (a0)
; RV64-NEXT: vmv1r.v v26, v19
; RV64-NEXT: vsseg7e32.v v21, (a1)
-; RV64-NEXT: vl1re32.v v10, (a6)
+; RV64-NEXT: vl1re32.v v18, (a6)
; RV64-NEXT: add a6, a6, a2
-; RV64-NEXT: vl1re32.v v11, (a6)
+; RV64-NEXT: vl1re32.v v19, (a6)
; RV64-NEXT: add a6, a6, a2
-; RV64-NEXT: vl1re32.v v12, (a6)
+; RV64-NEXT: vl1re32.v v20, (a6)
; RV64-NEXT: add a6, a6, a2
-; RV64-NEXT: vl1re32.v v13, (a6)
+; RV64-NEXT: vl1re32.v v21, (a6)
; RV64-NEXT: add a6, a3, a2
-; RV64-NEXT: vl1re32.v v18, (a6)
+; RV64-NEXT: vl1re32.v v10, (a6)
; RV64-NEXT: add a6, a6, a2
-; RV64-NEXT: vl1re32.v v19, (a6)
-; RV64-NEXT: vl1re32.v v16, (a0)
-; RV64-NEXT: vl1re32.v v8, (a4)
-; RV64-NEXT: vl1re32.v v17, (a3)
-; RV64-NEXT: vl1re32.v v9, (a7)
+; RV64-NEXT: vl1re32.v v11, (a6)
+; RV64-NEXT: vl1re32.v v8, (a0)
+; RV64-NEXT: vl1re32.v v16, (a4)
+; RV64-NEXT: vl1re32.v v9, (a3)
+; RV64-NEXT: vl1re32.v v17, (a7)
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: li a3, 14
; RV64-NEXT: mul a0, a0, a3
; RV64-NEXT: add a0, sp, a0
; RV64-NEXT: addi a0, a0, 64
; RV64-NEXT: add a6, a6, a2
-; RV64-NEXT: vl1re32.v v20, (a6)
+; RV64-NEXT: vl1re32.v v12, (a6)
; RV64-NEXT: add a6, a6, a2
-; RV64-NEXT: vl1re32.v v21, (a6)
+; RV64-NEXT: vl1re32.v v13, (a6)
; RV64-NEXT: add a6, a6, a2
; RV64-NEXT: slli a2, a2, 3
; RV64-NEXT: add a2, a0, a2
-; RV64-NEXT: vl1re32.v v22, (a6)
-; RV64-NEXT: vl1re32.v v23, (a1)
+; RV64-NEXT: vl1re32.v v14, (a6)
+; RV64-NEXT: vl1re32.v v15, (a1)
; RV64-NEXT: add a5, a0, a5
-; RV64-NEXT: vs2r.v v12, (a5)
-; RV64-NEXT: vs4r.v v8, (a2)
-; RV64-NEXT: vs8r.v v16, (a0)
+; RV64-NEXT: vs2r.v v20, (a5)
+; RV64-NEXT: vs4r.v v16, (a2)
+; RV64-NEXT: vs8r.v v8, (a0)
; RV64-NEXT: vl8re32.v v16, (a2)
; RV64-NEXT: vl8re32.v v8, (a0)
; RV64-NEXT: addi sp, s0, -80
@@ -3113,39 +3131,39 @@ define <vscale x 28 x i32> @vector_interleave_nxv28i32_nxv4i32(<vscale x 4 x i32
; ZVBB-RV32-NEXT: vsseg7e32.v v1, (a0)
; ZVBB-RV32-NEXT: vmv1r.v v26, v19
; ZVBB-RV32-NEXT: vsseg7e32.v v21, (a1)
-; ZVBB-RV32-NEXT: vl1re32.v v10, (a6)
+; ZVBB-RV32-NEXT: vl1re32.v v18, (a6)
; ZVBB-RV32-NEXT: add a6, a6, a2
-; ZVBB-RV32-NEXT: vl1re32.v v11, (a6)
+; ZVBB-RV32-NEXT: vl1re32.v v19, (a6)
; ZVBB-RV32-NEXT: add a6, a6, a2
-; ZVBB-RV32-NEXT: vl1re32.v v12, (a6)
+; ZVBB-RV32-NEXT: vl1re32.v v20, (a6)
; ZVBB-RV32-NEXT: add a6, a6, a2
-; ZVBB-RV32-NEXT: vl1re32.v v13, (a6)
+; ZVBB-RV32-NEXT: vl1re32.v v21, (a6)
; ZVBB-RV32-NEXT: add a6, a3, a2
-; ZVBB-RV32-NEXT: vl1re32.v v18, (a6)
+; ZVBB-RV32-NEXT: vl1re32.v v10, (a6)
; ZVBB-RV32-NEXT: add a6, a6, a2
-; ZVBB-RV32-NEXT: vl1re32.v v19, (a6)
-; ZVBB-RV32-NEXT: vl1re32.v v16, (a0)
-; ZVBB-RV32-NEXT: vl1re32.v v8, (a4)
-; ZVBB-RV32-NEXT: vl1re32.v v17, (a3)
-; ZVBB-RV32-NEXT: vl1re32.v v9, (a7)
+; ZVBB-RV32-NEXT: vl1re32.v v11, (a6)
+; ZVBB-RV32-NEXT: vl1re32.v v8, (a0)
+; ZVBB-RV32-NEXT: vl1re32.v v16, (a4)
+; ZVBB-RV32-NEXT: vl1re32.v v9, (a3)
+; ZVBB-RV32-NEXT: vl1re32.v v17, (a7)
; ZVBB-RV32-NEXT: csrr a0, vlenb
; ZVBB-RV32-NEXT: li a3, 14
; ZVBB-RV32-NEXT: mul a0, a0, a3
; ZVBB-RV32-NEXT: add a0, sp, a0
; ZVBB-RV32-NEXT: addi a0, a0, 64
; ZVBB-RV32-NEXT: add a6, a6, a2
-; ZVBB-RV32-NEXT: vl1re32.v v20, (a6)
+; ZVBB-RV32-NEXT: vl1re32.v v12, (a6)
; ZVBB-RV32-NEXT: add a6, a6, a2
-; ZVBB-RV32-NEXT: vl1re32.v v21, (a6)
+; ZVBB-RV32-NEXT: vl1re32.v v13, (a6)
; ZVBB-RV32-NEXT: add a6, a6, a2
; ZVBB-RV32-NEXT: slli a2, a2, 3
; ZVBB-RV32-NEXT: add a2, a0, a2
-; ZVBB-RV32-NEXT: vl1re32.v v22, (a6)
-; ZVBB-RV32-NEXT: vl1re32.v v23, (a1)
+; ZVBB-RV32-NEXT: vl1re32.v v14, (a6)
+; ZVBB-RV32-NEXT: vl1re32.v v15, (a1)
; ZVBB-RV32-NEXT: add a5, a0, a5
-; ZVBB-RV32-NEXT: vs2r.v v12, (a5)
-; ZVBB-RV32-NEXT: vs4r.v v8, (a2)
-; ZVBB-RV32-NEXT: vs8r.v v16, (a0)
+; ZVBB-RV32-NEXT: vs2r.v v20, (a5)
+; ZVBB-RV32-NEXT: vs4r.v v16, (a2)
+; ZVBB-RV32-NEXT: vs8r.v v8, (a0)
; ZVBB-RV32-NEXT: vl8re32.v v16, (a2)
; ZVBB-RV32-NEXT: vl8re32.v v8, (a0)
; ZVBB-RV32-NEXT: addi sp, s0, -80
@@ -3195,39 +3213,39 @@ define <vscale x 28 x i32> @vector_interleave_nxv28i32_nxv4i32(<vscale x 4 x i32
; ZVBB-RV64-NEXT: vsseg7e32.v v1, (a0)
; ZVBB-RV64-NEXT: vmv1r.v v26, v19
; ZVBB-RV64-NEXT: vsseg7e32.v v21, (a1)
-; ZVBB-RV64-NEXT: vl1re32.v v10, (a6)
+; ZVBB-RV64-NEXT: vl1re32.v v18, (a6)
; ZVBB-RV64-NEXT: add a6, a6, a2
-; ZVBB-RV64-NEXT: vl1re32.v v11, (a6)
+; ZVBB-RV64-NEXT: vl1re32.v v19, (a6)
; ZVBB-RV64-NEXT: add a6, a6, a2
-; ZVBB-RV64-NEXT: vl1re32.v v12, (a6)
+; ZVBB-RV64-NEXT: vl1re32.v v20, (a6)
; ZVBB-RV64-NEXT: add a6, a6, a2
-; ZVBB-RV64-NEXT: vl1re32.v v13, (a6)
+; ZVBB-RV64-NEXT: vl1re32.v v21, (a6)
; ZVBB-RV64-NEXT: add a6, a3, a2
-; ZVBB-RV64-NEXT: vl1re32.v v18, (a6)
+; ZVBB-RV64-NEXT: vl1re32.v v10, (a6)
; ZVBB-RV64-NEXT: add a6, a6, a2
-; ZVBB-RV64-NEXT: vl1re32.v v19, (a6)
-; ZVBB-RV64-NEXT: vl1re32.v v16, (a0)
-; ZVBB-RV64-NEXT: vl1re32.v v8, (a4)
-; ZVBB-RV64-NEXT: vl1re32.v v17, (a3)
-; ZVBB-RV64-NEXT: vl1re32.v v9, (a7)
+; ZVBB-RV64-NEXT: vl1re32.v v11, (a6)
+; ZVBB-RV64-NEXT: vl1re32.v v8, (a0)
+; ZVBB-RV64-NEXT: vl1re32.v v16, (a4)
+; ZVBB-RV64-NEXT: vl1re32.v v9, (a3)
+; ZVBB-RV64-NEXT: vl1re32.v v17, (a7)
; ZVBB-RV64-NEXT: csrr a0, vlenb
; ZVBB-RV64-NEXT: li a3, 14
; ZVBB-RV64-NEXT: mul a0, a0, a3
; ZVBB-RV64-NEXT: add a0, sp, a0
; ZVBB-RV64-NEXT: addi a0, a0, 64
; ZVBB-RV64-NEXT: add a6, a6, a2
-; ZVBB-RV64-NEXT: vl1re32.v v20, (a6)
+; ZVBB-RV64-NEXT: vl1re32.v v12, (a6)
; ZVBB-RV64-NEXT: add a6, a6, a2
-; ZVBB-RV64-NEXT: vl1re32.v v21, (a6)
+; ZVBB-RV64-NEXT: vl1re32.v v13, (a6)
; ZVBB-RV64-NEXT: add a6, a6, a2
; ZVBB-RV64-NEXT: slli a2, a2, 3
; ZVBB-RV64-NEXT: add a2, a0, a2
-; ZVBB-RV64-NEXT: vl1re32.v v22, (a6)
-; ZVBB-RV64-NEXT: vl1re32.v v23, (a1)
+; ZVBB-RV64-NEXT: vl1re32.v v14, (a6)
+; ZVBB-RV64-NEXT: vl1re32.v v15, (a1)
; ZVBB-RV64-NEXT: add a5, a0, a5
-; ZVBB-RV64-NEXT: vs2r.v v12, (a5)
-; ZVBB-RV64-NEXT: vs4r.v v8, (a2)
-; ZVBB-RV64-NEXT: vs8r.v v16, (a0)
+; ZVBB-RV64-NEXT: vs2r.v v20, (a5)
+; ZVBB-RV64-NEXT: vs4r.v v16, (a2)
+; ZVBB-RV64-NEXT: vs8r.v v8, (a0)
; ZVBB-RV64-NEXT: vl8re32.v v16, (a2)
; ZVBB-RV64-NEXT: vl8re32.v v8, (a0)
; ZVBB-RV64-NEXT: addi sp, s0, -80
@@ -3282,39 +3300,39 @@ define <vscale x 14 x i64> @vector_interleave_nxv14i64_nxv2i64(<vscale x 2 x i64
; RV32-NEXT: vsseg7e64.v v1, (a0)
; RV32-NEXT: vmv1r.v v26, v19
; RV32-NEXT: vsseg7e64.v v21, (a1)
-; RV32-NEXT: vl1re64.v v10, (a6)
+; RV32-NEXT: vl1re64.v v18, (a6)
; RV32-NEXT: add a6, a6, a2
-; RV32-NEXT: vl1re64.v v11, (a6)
+; RV32-NEXT: vl1re64.v v19, (a6)
; RV32-NEXT: add a6, a6, a2
-; RV32-NEXT: vl1re64.v v12, (a6)
+; RV32-NEXT: vl1re64.v v20, (a6)
; RV32-NEXT: add a6, a6, a2
-; RV32-NEXT: vl1re64.v v13, (a6)
+; RV32-NEXT: vl1re64.v v21, (a6)
; RV32-NEXT: add a6, a3, a2
-; RV32-NEXT: vl1re64.v v18, (a6)
+; RV32-NEXT: vl1re64.v v10, (a6)
; RV32-NEXT: add a6, a6, a2
-; RV32-NEXT: vl1re64.v v19, (a6)
-; RV32-NEXT: vl1re64.v v16, (a0)
-; RV32-NEXT: vl1re64.v v8, (a4)
-; RV32-NEXT: vl1re64.v v17, (a3)
-; RV32-NEXT: vl1re64.v v9, (a7)
+; RV32-NEXT: vl1re64.v v11, (a6)
+; RV32-NEXT: vl1re64.v v8, (a0)
+; RV32-NEXT: vl1re64.v v16, (a4)
+; RV32-NEXT: vl1re64.v v9, (a3)
+; RV32-NEXT: vl1re64.v v17, (a7)
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: li a3, 14
; RV32-NEXT: mul a0, a0, a3
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 64
; RV32-NEXT: add a6, a6, a2
-; RV32-NEXT: vl1re64.v v20, (a6)
+; RV32-NEXT: vl1re64.v v12, (a6)
; RV32-NEXT: add a6, a6, a2
-; RV32-NEXT: vl1re64.v v21, (a6)
+; RV32-NEXT: vl1re64.v v13, (a6)
; RV32-NEXT: add a6, a6, a2
; RV32-NEXT: slli a2, a2, 3
; RV32-NEXT: add a2, a0, a2
-; RV32-NEXT: vl1re64.v v22, (a6)
-; RV32-NEXT: vl1re64.v v23, (a1)
+; RV32-NEXT: vl1re64.v v14, (a6)
+; RV32-NEXT: vl1re64.v v15, (a1)
; RV32-NEXT: add a5, a0, a5
-; RV32-NEXT: vs2r.v v12, (a5)
-; RV32-NEXT: vs4r.v v8, (a2)
-; RV32-NEXT: vs8r.v v16, (a0)
+; RV32-NEXT: vs2r.v v20, (a5)
+; RV32-NEXT: vs4r.v v16, (a2)
+; RV32-NEXT: vs8r.v v8, (a0)
; RV32-NEXT: vl8re64.v v16, (a2)
; RV32-NEXT: vl8re64.v v8, (a0)
; RV32-NEXT: addi sp, s0, -80
@@ -3364,39 +3382,39 @@ define <vscale x 14 x i64> @vector_interleave_nxv14i64_nxv2i64(<vscale x 2 x i64
; RV64-NEXT: vsseg7e64.v v1, (a0)
; RV64-NEXT: vmv1r.v v26, v19
; RV64-NEXT: vsseg7e64.v v21, (a1)
-; RV64-NEXT: vl1re64.v v10, (a6)
+; RV64-NEXT: vl1re64.v v18, (a6)
; RV64-NEXT: add a6, a6, a2
-; RV64-NEXT: vl1re64.v v11, (a6)
+; RV64-NEXT: vl1re64.v v19, (a6)
; RV64-NEXT: add a6, a6, a2
-; RV64-NEXT: vl1re64.v v12, (a6)
+; RV64-NEXT: vl1re64.v v20, (a6)
; RV64-NEXT: add a6, a6, a2
-; RV64-NEXT: vl1re64.v v13, (a6)
+; RV64-NEXT: vl1re64.v v21, (a6)
; RV64-NEXT: add a6, a3, a2
-; RV64-NEXT: vl1re64.v v18, (a6)
+; RV64-NEXT: vl1re64.v v10, (a6)
; RV64-NEXT: add a6, a6, a2
-; RV64-NEXT: vl1re64.v v19, (a6)
-; RV64-NEXT: vl1re64.v v16, (a0)
-; RV64-NEXT: vl1re64.v v8, (a4)
-; RV64-NEXT: vl1re64.v v17, (a3)
-; RV64-NEXT: vl1re64.v v9, (a7)
+; RV64-NEXT: vl1re64.v v11, (a6)
+; RV64-NEXT: vl1re64.v v8, (a0)
+; RV64-NEXT: vl1re64.v v16, (a4)
+; RV64-NEXT: vl1re64.v v9, (a3)
+; RV64-NEXT: vl1re64.v v17, (a7)
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: li a3, 14
; RV64-NEXT: mul a0, a0, a3
; RV64-NEXT: add a0, sp, a0
; RV64-NEXT: addi a0, a0, 64
; RV64-NEXT: add a6, a6, a2
-; RV64-NEXT: vl1re64.v v20, (a6)
+; RV64-NEXT: vl1re64.v v12, (a6)
; RV64-NEXT: add a6, a6, a2
-; RV64-NEXT: vl1re64.v v21, (a6)
+; RV64-NEXT: vl1re64.v v13, (a6)
; RV64-NEXT: add a6, a6, a2
; RV64-NEXT: slli a2, a2, 3
; RV64-NEXT: add a2, a0, a2
-; RV64-NEXT: vl1re64.v v22, (a6)
-; RV64-NEXT: vl1re64.v v23, (a1)
+; RV64-NEXT: vl1re64.v v14, (a6)
+; RV64-NEXT: vl1re64.v v15, (a1)
; RV64-NEXT: add a5, a0, a5
-; RV64-NEXT: vs2r.v v12, (a5)
-; RV64-NEXT: vs4r.v v8, (a2)
-; RV64-NEXT: vs8r.v v16, (a0)
+; RV64-NEXT: vs2r.v v20, (a5)
+; RV64-NEXT: vs4r.v v16, (a2)
+; RV64-NEXT: vs8r.v v8, (a0)
; RV64-NEXT: vl8re64.v v16, (a2)
; RV64-NEXT: vl8re64.v v8, (a0)
; RV64-NEXT: addi sp, s0, -80
@@ -3446,39 +3464,39 @@ define <vscale x 14 x i64> @vector_interleave_nxv14i64_nxv2i64(<vscale x 2 x i64
; ZVBB-RV32-NEXT: vsseg7e64.v v1, (a0)
; ZVBB-RV32-NEXT: vmv1r.v v26, v19
; ZVBB-RV32-NEXT: vsseg7e64.v v21, (a1)
-; ZVBB-RV32-NEXT: vl1re64.v v10, (a6)
+; ZVBB-RV32-NEXT: vl1re64.v v18, (a6)
; ZVBB-RV32-NEXT: add a6, a6, a2
-; ZVBB-RV32-NEXT: vl1re64.v v11, (a6)
+; ZVBB-RV32-NEXT: vl1re64.v v19, (a6)
; ZVBB-RV32-NEXT: add a6, a6, a2
-; ZVBB-RV32-NEXT: vl1re64.v v12, (a6)
+; ZVBB-RV32-NEXT: vl1re64.v v20, (a6)
; ZVBB-RV32-NEXT: add a6, a6, a2
-; ZVBB-RV32-NEXT: vl1re64.v v13, (a6)
+; ZVBB-RV32-NEXT: vl1re64.v v21, (a6)
; ZVBB-RV32-NEXT: add a6, a3, a2
-; ZVBB-RV32-NEXT: vl1re64.v v18, (a6)
+; ZVBB-RV32-NEXT: vl1re64.v v10, (a6)
; ZVBB-RV32-NEXT: add a6, a6, a2
-; ZVBB-RV32-NEXT: vl1re64.v v19, (a6)
-; ZVBB-RV32-NEXT: vl1re64.v v16, (a0)
-; ZVBB-RV32-NEXT: vl1re64.v v8, (a4)
-; ZVBB-RV32-NEXT: vl1re64.v v17, (a3)
-; ZVBB-RV32-NEXT: vl1re64.v v9, (a7)
+; ZVBB-RV32-NEXT: vl1re64.v v11, (a6)
+; ZVBB-RV32-NEXT: vl1re64.v v8, (a0)
+; ZVBB-RV32-NEXT: vl1re64.v v16, (a4)
+; ZVBB-RV32-NEXT: vl1re64.v v9, (a3)
+; ZVBB-RV32-NEXT: vl1re64.v v17, (a7)
; ZVBB-RV32-NEXT: csrr a0, vlenb
; ZVBB-RV32-NEXT: li a3, 14
; ZVBB-RV32-NEXT: mul a0, a0, a3
; ZVBB-RV32-NEXT: add a0, sp, a0
; ZVBB-RV32-NEXT: addi a0, a0, 64
; ZVBB-RV32-NEXT: add a6, a6, a2
-; ZVBB-RV32-NEXT: vl1re64.v v20, (a6)
+; ZVBB-RV32-NEXT: vl1re64.v v12, (a6)
; ZVBB-RV32-NEXT: add a6, a6, a2
-; ZVBB-RV32-NEXT: vl1re64.v v21, (a6)
+; ZVBB-RV32-NEXT: vl1re64.v v13, (a6)
; ZVBB-RV32-NEXT: add a6, a6, a2
; ZVBB-RV32-NEXT: slli a2, a2, 3
; ZVBB-RV32-NEXT: add a2, a0, a2
-; ZVBB-RV32-NEXT: vl1re64.v v22, (a6)
-; ZVBB-RV32-NEXT: vl1re64.v v23, (a1)
+; ZVBB-RV32-NEXT: vl1re64.v v14, (a6)
+; ZVBB-RV32-NEXT: vl1re64.v v15, (a1)
; ZVBB-RV32-NEXT: add a5, a0, a5
-; ZVBB-RV32-NEXT: vs2r.v v12, (a5)
-; ZVBB-RV32-NEXT: vs4r.v v8, (a2)
-; ZVBB-RV32-NEXT: vs8r.v v16, (a0)
+; ZVBB-RV32-NEXT: vs2r.v v20, (a5)
+; ZVBB-RV32-NEXT: vs4r.v v16, (a2)
+; ZVBB-RV32-NEXT: vs8r.v v8, (a0)
; ZVBB-RV32-NEXT: vl8re64.v v16, (a2)
; ZVBB-RV32-NEXT: vl8re64.v v8, (a0)
; ZVBB-RV32-NEXT: addi sp, s0, -80
@@ -3528,39 +3546,39 @@ define <vscale x 14 x i64> @vector_interleave_nxv14i64_nxv2i64(<vscale x 2 x i64
; ZVBB-RV64-NEXT: vsseg7e64.v v1, (a0)
; ZVBB-RV64-NEXT: vmv1r.v v26, v19
; ZVBB-RV64-NEXT: vsseg7e64.v v21, (a1)
-; ZVBB-RV64-NEXT: vl1re64.v v10, (a6)
+; ZVBB-RV64-NEXT: vl1re64.v v18, (a6)
; ZVBB-RV64-NEXT: add a6, a6, a2
-; ZVBB-RV64-NEXT: vl1re64.v v11, (a6)
+; ZVBB-RV64-NEXT: vl1re64.v v19, (a6)
; ZVBB-RV64-NEXT: add a6, a6, a2
-; ZVBB-RV64-NEXT: vl1re64.v v12, (a6)
+; ZVBB-RV64-NEXT: vl1re64.v v20, (a6)
; ZVBB-RV64-NEXT: add a6, a6, a2
-; ZVBB-RV64-NEXT: vl1re64.v v13, (a6)
+; ZVBB-RV64-NEXT: vl1re64.v v21, (a6)
; ZVBB-RV64-NEXT: add a6, a3, a2
-; ZVBB-RV64-NEXT: vl1re64.v v18, (a6)
+; ZVBB-RV64-NEXT: vl1re64.v v10, (a6)
; ZVBB-RV64-NEXT: add a6, a6, a2
-; ZVBB-RV64-NEXT: vl1re64.v v19, (a6)
-; ZVBB-RV64-NEXT: vl1re64.v v16, (a0)
-; ZVBB-RV64-NEXT: vl1re64.v v8, (a4)
-; ZVBB-RV64-NEXT: vl1re64.v v17, (a3)
-; ZVBB-RV64-NEXT: vl1re64.v v9, (a7)
+; ZVBB-RV64-NEXT: vl1re64.v v11, (a6)
+; ZVBB-RV64-NEXT: vl1re64.v v8, (a0)
+; ZVBB-RV64-NEXT: vl1re64.v v16, (a4)
+; ZVBB-RV64-NEXT: vl1re64.v v9, (a3)
+; ZVBB-RV64-NEXT: vl1re64.v v17, (a7)
; ZVBB-RV64-NEXT: csrr a0, vlenb
; ZVBB-RV64-NEXT: li a3, 14
; ZVBB-RV64-NEXT: mul a0, a0, a3
; ZVBB-RV64-NEXT: add a0, sp, a0
; ZVBB-RV64-NEXT: addi a0, a0, 64
; ZVBB-RV64-NEXT: add a6, a6, a2
-; ZVBB-RV64-NEXT: vl1re64.v v20, (a6)
+; ZVBB-RV64-NEXT: vl1re64.v v12, (a6)
; ZVBB-RV64-NEXT: add a6, a6, a2
-; ZVBB-RV64-NEXT: vl1re64.v v21, (a6)
+; ZVBB-RV64-NEXT: vl1re64.v v13, (a6)
; ZVBB-RV64-NEXT: add a6, a6, a2
; ZVBB-RV64-NEXT: slli a2, a2, 3
; ZVBB-RV64-NEXT: add a2, a0, a2
-; ZVBB-RV64-NEXT: vl1re64.v v22, (a6)
-; ZVBB-RV64-NEXT: vl1re64.v v23, (a1)
+; ZVBB-RV64-NEXT: vl1re64.v v14, (a6)
+; ZVBB-RV64-NEXT: vl1re64.v v15, (a1)
; ZVBB-RV64-NEXT: add a5, a0, a5
-; ZVBB-RV64-NEXT: vs2r.v v12, (a5)
-; ZVBB-RV64-NEXT: vs4r.v v8, (a2)
-; ZVBB-RV64-NEXT: vs8r.v v16, (a0)
+; ZVBB-RV64-NEXT: vs2r.v v20, (a5)
+; ZVBB-RV64-NEXT: vs4r.v v16, (a2)
+; ZVBB-RV64-NEXT: vs8r.v v8, (a0)
; ZVBB-RV64-NEXT: vl8re64.v v16, (a2)
; ZVBB-RV64-NEXT: vl8re64.v v8, (a0)
; ZVBB-RV64-NEXT: addi sp, s0, -80
diff --git a/llvm/test/CodeGen/RISCV/rvv/vexts-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vexts-sdnode.ll
index c2ac155304dd5..d27f749880ab4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vexts-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vexts-sdnode.ll
@@ -116,8 +116,8 @@ define <vscale x 2 x i64> @vsext_nxv2i8_nxv2i64(<vscale x 2 x i8> %va) {
; CHECK-LABEL: vsext_nxv2i8_nxv2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT: vsext.vf8 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vsext.vf8 v8, v10
; CHECK-NEXT: ret
%evec = sext <vscale x 2 x i8> %va to <vscale x 2 x i64>
ret <vscale x 2 x i64> %evec
@@ -127,8 +127,8 @@ define <vscale x 2 x i64> @vzext_nxv2i8_nxv2i64(<vscale x 2 x i8> %va) {
; CHECK-LABEL: vzext_nxv2i8_nxv2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT: vzext.vf8 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vzext.vf8 v8, v10
; CHECK-NEXT: ret
%evec = zext <vscale x 2 x i8> %va to <vscale x 2 x i64>
ret <vscale x 2 x i64> %evec
@@ -160,8 +160,8 @@ define <vscale x 4 x i32> @vsext_nxv4i8_nxv4i32(<vscale x 4 x i8> %va) {
; CHECK-LABEL: vsext_nxv4i8_nxv4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vsext.vf4 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vsext.vf4 v8, v10
; CHECK-NEXT: ret
%evec = sext <vscale x 4 x i8> %va to <vscale x 4 x i32>
ret <vscale x 4 x i32> %evec
@@ -171,8 +171,8 @@ define <vscale x 4 x i32> @vzext_nxv4i8_nxv4i32(<vscale x 4 x i8> %va) {
; CHECK-LABEL: vzext_nxv4i8_nxv4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vzext.vf4 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vzext.vf4 v8, v10
; CHECK-NEXT: ret
%evec = zext <vscale x 4 x i8> %va to <vscale x 4 x i32>
ret <vscale x 4 x i32> %evec
@@ -182,8 +182,8 @@ define <vscale x 4 x i64> @vsext_nxv4i8_nxv4i64(<vscale x 4 x i8> %va) {
; CHECK-LABEL: vsext_nxv4i8_nxv4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT: vsext.vf8 v12, v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vsext.vf8 v8, v12
; CHECK-NEXT: ret
%evec = sext <vscale x 4 x i8> %va to <vscale x 4 x i64>
ret <vscale x 4 x i64> %evec
@@ -193,8 +193,8 @@ define <vscale x 4 x i64> @vzext_nxv4i8_nxv4i64(<vscale x 4 x i8> %va) {
; CHECK-LABEL: vzext_nxv4i8_nxv4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT: vzext.vf8 v12, v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vzext.vf8 v8, v12
; CHECK-NEXT: ret
%evec = zext <vscale x 4 x i8> %va to <vscale x 4 x i64>
ret <vscale x 4 x i64> %evec
@@ -204,8 +204,8 @@ define <vscale x 8 x i16> @vsext_nxv8i8_nxv8i16(<vscale x 8 x i8> %va) {
; CHECK-LABEL: vsext_nxv8i8_nxv8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vsext.vf2 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vsext.vf2 v8, v10
; CHECK-NEXT: ret
%evec = sext <vscale x 8 x i8> %va to <vscale x 8 x i16>
ret <vscale x 8 x i16> %evec
@@ -215,8 +215,8 @@ define <vscale x 8 x i16> @vzext_nxv8i8_nxv8i16(<vscale x 8 x i8> %va) {
; CHECK-LABEL: vzext_nxv8i8_nxv8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vzext.vf2 v8, v10
; CHECK-NEXT: ret
%evec = zext <vscale x 8 x i8> %va to <vscale x 8 x i16>
ret <vscale x 8 x i16> %evec
@@ -226,8 +226,8 @@ define <vscale x 8 x i32> @vsext_nxv8i8_nxv8i32(<vscale x 8 x i8> %va) {
; CHECK-LABEL: vsext_nxv8i8_nxv8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vsext.vf4 v12, v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vsext.vf4 v8, v12
; CHECK-NEXT: ret
%evec = sext <vscale x 8 x i8> %va to <vscale x 8 x i32>
ret <vscale x 8 x i32> %evec
@@ -237,8 +237,8 @@ define <vscale x 8 x i32> @vzext_nxv8i8_nxv8i32(<vscale x 8 x i8> %va) {
; CHECK-LABEL: vzext_nxv8i8_nxv8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vzext.vf4 v12, v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vzext.vf4 v8, v12
; CHECK-NEXT: ret
%evec = zext <vscale x 8 x i8> %va to <vscale x 8 x i32>
ret <vscale x 8 x i32> %evec
@@ -248,8 +248,8 @@ define <vscale x 8 x i64> @vsext_nxv8i8_nxv8i64(<vscale x 8 x i8> %va) {
; CHECK-LABEL: vsext_nxv8i8_nxv8i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT: vsext.vf8 v16, v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv1r.v v16, v8
+; CHECK-NEXT: vsext.vf8 v8, v16
; CHECK-NEXT: ret
%evec = sext <vscale x 8 x i8> %va to <vscale x 8 x i64>
ret <vscale x 8 x i64> %evec
@@ -259,8 +259,8 @@ define <vscale x 8 x i64> @vzext_nxv8i8_nxv8i64(<vscale x 8 x i8> %va) {
; CHECK-LABEL: vzext_nxv8i8_nxv8i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT: vzext.vf8 v16, v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv1r.v v16, v8
+; CHECK-NEXT: vzext.vf8 v8, v16
; CHECK-NEXT: ret
%evec = zext <vscale x 8 x i8> %va to <vscale x 8 x i64>
ret <vscale x 8 x i64> %evec
@@ -270,8 +270,8 @@ define <vscale x 16 x i16> @vsext_nxv16i8_nxv16i16(<vscale x 16 x i8> %va) {
; CHECK-LABEL: vsext_nxv16i8_nxv16i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vsext.vf2 v12, v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vsext.vf2 v8, v12
; CHECK-NEXT: ret
%evec = sext <vscale x 16 x i8> %va to <vscale x 16 x i16>
ret <vscale x 16 x i16> %evec
@@ -281,8 +281,8 @@ define <vscale x 16 x i16> @vzext_nxv16i8_nxv16i16(<vscale x 16 x i8> %va) {
; CHECK-LABEL: vzext_nxv16i8_nxv16i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vzext.vf2 v12, v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vzext.vf2 v8, v12
; CHECK-NEXT: ret
%evec = zext <vscale x 16 x i8> %va to <vscale x 16 x i16>
ret <vscale x 16 x i16> %evec
@@ -292,8 +292,8 @@ define <vscale x 16 x i32> @vsext_nxv16i8_nxv16i32(<vscale x 16 x i8> %va) {
; CHECK-LABEL: vsext_nxv16i8_nxv16i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT: vsext.vf4 v16, v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: vsext.vf4 v8, v16
; CHECK-NEXT: ret
%evec = sext <vscale x 16 x i8> %va to <vscale x 16 x i32>
ret <vscale x 16 x i32> %evec
@@ -303,8 +303,8 @@ define <vscale x 16 x i32> @vzext_nxv16i8_nxv16i32(<vscale x 16 x i8> %va) {
; CHECK-LABEL: vzext_nxv16i8_nxv16i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT: vzext.vf4 v16, v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: vzext.vf4 v8, v16
; CHECK-NEXT: ret
%evec = zext <vscale x 16 x i8> %va to <vscale x 16 x i32>
ret <vscale x 16 x i32> %evec
@@ -314,8 +314,8 @@ define <vscale x 32 x i16> @vsext_nxv32i8_nxv32i16(<vscale x 32 x i8> %va) {
; CHECK-LABEL: vsext_nxv32i8_nxv32i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT: vsext.vf2 v16, v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vsext.vf2 v8, v16
; CHECK-NEXT: ret
%evec = sext <vscale x 32 x i8> %va to <vscale x 32 x i16>
ret <vscale x 32 x i16> %evec
@@ -325,8 +325,8 @@ define <vscale x 32 x i16> @vzext_nxv32i8_nxv32i16(<vscale x 32 x i8> %va) {
; CHECK-LABEL: vzext_nxv32i8_nxv32i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT: vzext.vf2 v16, v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vzext.vf2 v8, v16
; CHECK-NEXT: ret
%evec = zext <vscale x 32 x i8> %va to <vscale x 32 x i16>
ret <vscale x 32 x i16> %evec
@@ -402,8 +402,8 @@ define <vscale x 2 x i64> @vsext_nxv2i16_nxv2i64(<vscale x 2 x i16> %va) {
; CHECK-LABEL: vsext_nxv2i16_nxv2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT: vsext.vf4 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vsext.vf4 v8, v10
; CHECK-NEXT: ret
%evec = sext <vscale x 2 x i16> %va to <vscale x 2 x i64>
ret <vscale x 2 x i64> %evec
@@ -413,8 +413,8 @@ define <vscale x 2 x i64> @vzext_nxv2i16_nxv2i64(<vscale x 2 x i16> %va) {
; CHECK-LABEL: vzext_nxv2i16_nxv2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT: vzext.vf4 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vzext.vf4 v8, v10
; CHECK-NEXT: ret
%evec = zext <vscale x 2 x i16> %va to <vscale x 2 x i64>
ret <vscale x 2 x i64> %evec
@@ -424,8 +424,8 @@ define <vscale x 4 x i32> @vsext_nxv4i16_nxv4i32(<vscale x 4 x i16> %va) {
; CHECK-LABEL: vsext_nxv4i16_nxv4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vsext.vf2 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vsext.vf2 v8, v10
; CHECK-NEXT: ret
%evec = sext <vscale x 4 x i16> %va to <vscale x 4 x i32>
ret <vscale x 4 x i32> %evec
@@ -435,8 +435,8 @@ define <vscale x 4 x i32> @vzext_nxv4i16_nxv4i32(<vscale x 4 x i16> %va) {
; CHECK-LABEL: vzext_nxv4i16_nxv4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vzext.vf2 v8, v10
; CHECK-NEXT: ret
%evec = zext <vscale x 4 x i16> %va to <vscale x 4 x i32>
ret <vscale x 4 x i32> %evec
@@ -446,8 +446,8 @@ define <vscale x 4 x i64> @vsext_nxv4i16_nxv4i64(<vscale x 4 x i16> %va) {
; CHECK-LABEL: vsext_nxv4i16_nxv4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT: vsext.vf4 v12, v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vsext.vf4 v8, v12
; CHECK-NEXT: ret
%evec = sext <vscale x 4 x i16> %va to <vscale x 4 x i64>
ret <vscale x 4 x i64> %evec
@@ -457,8 +457,8 @@ define <vscale x 4 x i64> @vzext_nxv4i16_nxv4i64(<vscale x 4 x i16> %va) {
; CHECK-LABEL: vzext_nxv4i16_nxv4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT: vzext.vf4 v12, v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vzext.vf4 v8, v12
; CHECK-NEXT: ret
%evec = zext <vscale x 4 x i16> %va to <vscale x 4 x i64>
ret <vscale x 4 x i64> %evec
@@ -468,8 +468,8 @@ define <vscale x 8 x i32> @vsext_nxv8i16_nxv8i32(<vscale x 8 x i16> %va) {
; CHECK-LABEL: vsext_nxv8i16_nxv8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vsext.vf2 v12, v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vsext.vf2 v8, v12
; CHECK-NEXT: ret
%evec = sext <vscale x 8 x i16> %va to <vscale x 8 x i32>
ret <vscale x 8 x i32> %evec
@@ -479,8 +479,8 @@ define <vscale x 8 x i32> @vzext_nxv8i16_nxv8i32(<vscale x 8 x i16> %va) {
; CHECK-LABEL: vzext_nxv8i16_nxv8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vzext.vf2 v12, v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vzext.vf2 v8, v12
; CHECK-NEXT: ret
%evec = zext <vscale x 8 x i16> %va to <vscale x 8 x i32>
ret <vscale x 8 x i32> %evec
@@ -490,8 +490,8 @@ define <vscale x 8 x i64> @vsext_nxv8i16_nxv8i64(<vscale x 8 x i16> %va) {
; CHECK-LABEL: vsext_nxv8i16_nxv8i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT: vsext.vf4 v16, v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: vsext.vf4 v8, v16
; CHECK-NEXT: ret
%evec = sext <vscale x 8 x i16> %va to <vscale x 8 x i64>
ret <vscale x 8 x i64> %evec
@@ -501,8 +501,8 @@ define <vscale x 8 x i64> @vzext_nxv8i16_nxv8i64(<vscale x 8 x i16> %va) {
; CHECK-LABEL: vzext_nxv8i16_nxv8i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT: vzext.vf4 v16, v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: vzext.vf4 v8, v16
; CHECK-NEXT: ret
%evec = zext <vscale x 8 x i16> %va to <vscale x 8 x i64>
ret <vscale x 8 x i64> %evec
@@ -512,8 +512,8 @@ define <vscale x 16 x i32> @vsext_nxv16i16_nxv16i32(<vscale x 16 x i16> %va) {
; CHECK-LABEL: vsext_nxv16i16_nxv16i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT: vsext.vf2 v16, v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vsext.vf2 v8, v16
; CHECK-NEXT: ret
%evec = sext <vscale x 16 x i16> %va to <vscale x 16 x i32>
ret <vscale x 16 x i32> %evec
@@ -523,8 +523,8 @@ define <vscale x 16 x i32> @vzext_nxv16i16_nxv16i32(<vscale x 16 x i16> %va) {
; CHECK-LABEL: vzext_nxv16i16_nxv16i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT: vzext.vf2 v16, v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vzext.vf2 v8, v16
; CHECK-NEXT: ret
%evec = zext <vscale x 16 x i16> %va to <vscale x 16 x i32>
ret <vscale x 16 x i32> %evec
@@ -556,8 +556,8 @@ define <vscale x 2 x i64> @vsext_nxv2i32_nxv2i64(<vscale x 2 x i32> %va) {
; CHECK-LABEL: vsext_nxv2i32_nxv2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT: vsext.vf2 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vsext.vf2 v8, v10
; CHECK-NEXT: ret
%evec = sext <vscale x 2 x i32> %va to <vscale x 2 x i64>
ret <vscale x 2 x i64> %evec
@@ -567,8 +567,8 @@ define <vscale x 2 x i64> @vzext_nxv2i32_nxv2i64(<vscale x 2 x i32> %va) {
; CHECK-LABEL: vzext_nxv2i32_nxv2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vzext.vf2 v8, v10
; CHECK-NEXT: ret
%evec = zext <vscale x 2 x i32> %va to <vscale x 2 x i64>
ret <vscale x 2 x i64> %evec
@@ -578,8 +578,8 @@ define <vscale x 4 x i64> @vsext_nxv4i32_nxv4i64(<vscale x 4 x i32> %va) {
; CHECK-LABEL: vsext_nxv4i32_nxv4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT: vsext.vf2 v12, v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vsext.vf2 v8, v12
; CHECK-NEXT: ret
%evec = sext <vscale x 4 x i32> %va to <vscale x 4 x i64>
ret <vscale x 4 x i64> %evec
@@ -589,8 +589,8 @@ define <vscale x 4 x i64> @vzext_nxv4i32_nxv4i64(<vscale x 4 x i32> %va) {
; CHECK-LABEL: vzext_nxv4i32_nxv4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT: vzext.vf2 v12, v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vzext.vf2 v8, v12
; CHECK-NEXT: ret
%evec = zext <vscale x 4 x i32> %va to <vscale x 4 x i64>
ret <vscale x 4 x i64> %evec
@@ -600,8 +600,8 @@ define <vscale x 8 x i64> @vsext_nxv8i32_nxv8i64(<vscale x 8 x i32> %va) {
; CHECK-LABEL: vsext_nxv8i32_nxv8i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT: vsext.vf2 v16, v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vsext.vf2 v8, v16
; CHECK-NEXT: ret
%evec = sext <vscale x 8 x i32> %va to <vscale x 8 x i64>
ret <vscale x 8 x i64> %evec
@@ -611,8 +611,8 @@ define <vscale x 8 x i64> @vzext_nxv8i32_nxv8i64(<vscale x 8 x i32> %va) {
; CHECK-LABEL: vzext_nxv8i32_nxv8i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT: vzext.vf2 v16, v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vzext.vf2 v8, v16
; CHECK-NEXT: ret
%evec = zext <vscale x 8 x i32> %va to <vscale x 8 x i64>
ret <vscale x 8 x i64> %evec
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-constrained-sdnode.ll
index 0faebe87f2657..4078b1d2fefdc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfadd-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-constrained-sdnode.ll
@@ -103,11 +103,11 @@ define <vscale x 4 x bfloat> @vfadd_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloa
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a0, fa0
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfadd.vv v10, v10, v12
+; CHECK-NEXT: vfadd.vv v10, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
; CHECK-NEXT: ret
@@ -138,11 +138,11 @@ define <vscale x 8 x bfloat> @vfadd_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a0, fa0
; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vmv.v.x v16, a0
; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfadd.vv v12, v12, v16
+; CHECK-NEXT: vfadd.vv v12, v12, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
; CHECK-NEXT: ret
@@ -173,11 +173,11 @@ define <vscale x 16 x bfloat> @vfadd_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bf
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a0, fa0
; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT: vmv.v.x v12, a0
+; CHECK-NEXT: vmv.v.x v24, a0
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v24
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfadd.vv v16, v16, v24
+; CHECK-NEXT: vfadd.vv v16, v16, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
; CHECK-NEXT: ret
@@ -397,11 +397,11 @@ define <vscale x 4 x half> @vfadd_vf_nxv4f16(<vscale x 4 x half> %va, half %b) s
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a0
+; ZVFHMIN-NEXT: vmv.v.x v12, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfadd.vv v10, v10, v12
+; ZVFHMIN-NEXT: vfadd.vv v10, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
@@ -445,11 +445,11 @@ define <vscale x 8 x half> @vfadd_vf_nxv8f16(<vscale x 8 x half> %va, half %b) s
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a0
+; ZVFHMIN-NEXT: vmv.v.x v16, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfadd.vv v12, v12, v16
+; ZVFHMIN-NEXT: vfadd.vv v12, v12, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
@@ -493,11 +493,11 @@ define <vscale x 16 x half> @vfadd_vf_nxv16f16(<vscale x 16 x half> %va, half %b
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a0
+; ZVFHMIN-NEXT: vmv.v.x v24, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24
+; ZVFHMIN-NEXT: vfadd.vv v16, v16, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll
index 8354a55bd1867..3fe15c123ac04 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll
@@ -227,11 +227,11 @@ define <vscale x 4 x bfloat> @vfadd_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloa
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a1, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vmv.v.x v12, a1
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9, v0.t
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfadd.vv v10, v10, v12, v0.t
+; CHECK-NEXT: vfadd.vv v10, v10, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t
; CHECK-NEXT: ret
@@ -246,11 +246,11 @@ define <vscale x 4 x bfloat> @vfadd_vf_nxv4bf16_unmasked(<vscale x 4 x bfloat> %
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a1, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vmv.v.x v12, a1
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfadd.vv v10, v10, v12
+; CHECK-NEXT: vfadd.vv v10, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
; CHECK-NEXT: ret
@@ -297,11 +297,11 @@ define <vscale x 8 x bfloat> @vfadd_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a1, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vmv.v.x v16, a1
; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10, v0.t
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfadd.vv v12, v12, v16, v0.t
+; CHECK-NEXT: vfadd.vv v12, v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t
; CHECK-NEXT: ret
@@ -316,11 +316,11 @@ define <vscale x 8 x bfloat> @vfadd_vf_nxv8bf16_unmasked(<vscale x 8 x bfloat> %
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a1, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vmv.v.x v16, a1
; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfadd.vv v12, v12, v16
+; CHECK-NEXT: vfadd.vv v12, v12, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
; CHECK-NEXT: ret
@@ -367,11 +367,11 @@ define <vscale x 16 x bfloat> @vfadd_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bf
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a1, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vmv.v.x v24, a1
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfadd.vv v16, v16, v24, v0.t
+; CHECK-NEXT: vfadd.vv v16, v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t
; CHECK-NEXT: ret
@@ -386,11 +386,11 @@ define <vscale x 16 x bfloat> @vfadd_vf_nxv16bf16_unmasked(<vscale x 16 x bfloat
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a1, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vmv.v.x v24, a1
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v24
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfadd.vv v16, v16, v24
+; CHECK-NEXT: vfadd.vv v16, v16, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
; CHECK-NEXT: ret
@@ -925,11 +925,11 @@ define <vscale x 4 x half> @vfadd_vf_nxv4f16(<vscale x 4 x half> %va, half %b, <
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfadd.vv v10, v10, v12, v0.t
+; ZVFHMIN-NEXT: vfadd.vv v10, v10, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t
; ZVFHMIN-NEXT: ret
@@ -950,11 +950,11 @@ define <vscale x 4 x half> @vfadd_vf_nxv4f16_unmasked(<vscale x 4 x half> %va, h
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfadd.vv v10, v10, v12
+; ZVFHMIN-NEXT: vfadd.vv v10, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
@@ -1019,11 +1019,11 @@ define <vscale x 8 x half> @vfadd_vf_nxv8f16(<vscale x 8 x half> %va, half %b, <
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfadd.vv v12, v12, v16, v0.t
+; ZVFHMIN-NEXT: vfadd.vv v12, v12, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
; ZVFHMIN-NEXT: ret
@@ -1044,11 +1044,11 @@ define <vscale x 8 x half> @vfadd_vf_nxv8f16_unmasked(<vscale x 8 x half> %va, h
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfadd.vv v12, v12, v16
+; ZVFHMIN-NEXT: vfadd.vv v12, v12, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
@@ -1113,11 +1113,11 @@ define <vscale x 16 x half> @vfadd_vf_nxv16f16(<vscale x 16 x half> %va, half %b
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vmv.v.x v24, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24, v0.t
+; ZVFHMIN-NEXT: vfadd.vv v16, v16, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t
; ZVFHMIN-NEXT: ret
@@ -1138,11 +1138,11 @@ define <vscale x 16 x half> @vfadd_vf_nxv16f16_unmasked(<vscale x 16 x half> %va
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vmv.v.x v24, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24
+; ZVFHMIN-NEXT: vfadd.vv v16, v16, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
index beb56a2645a1c..83f588ce5027d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
@@ -1904,11 +1904,11 @@ define <vscale x 8 x double> @vfcopysign_exttrunc_vv_nxv8f64_nxv8f16(<vscale x 8
; CHECK-LABEL: vfcopysign_exttrunc_vv_nxv8f64_nxv8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.f.v v20, v16
+; CHECK-NEXT: vfwcvt.f.f.v v24, v16
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfwcvt.f.f.v v24, v20
+; CHECK-NEXT: vfwcvt.f.f.v v16, v24
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vfsgnj.vv v8, v8, v24
+; CHECK-NEXT: vfsgnj.vv v8, v8, v16
; CHECK-NEXT: ret
%e = fpext <vscale x 8 x half> %vs to <vscale x 8 x double>
%r = call <vscale x 8 x double> @llvm.copysign.nxv8f64(<vscale x 8 x double> %vm, <vscale x 8 x double> %e)
@@ -1933,11 +1933,11 @@ define <vscale x 8 x double> @vfcopynsign_exttrunc_vv_nxv8f64_nxv8f16(<vscale x
; CHECK-LABEL: vfcopynsign_exttrunc_vv_nxv8f64_nxv8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.f.v v20, v16
+; CHECK-NEXT: vfwcvt.f.f.v v24, v16
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfwcvt.f.f.v v24, v20
+; CHECK-NEXT: vfwcvt.f.f.v v16, v24
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vfsgnjn.vv v8, v8, v24
+; CHECK-NEXT: vfsgnjn.vv v8, v8, v16
; CHECK-NEXT: ret
%n = fneg <vscale x 8 x half> %vs
%eneg = fpext <vscale x 8 x half> %n to <vscale x 8 x double>
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-constrained-sdnode.ll
index 217a02d08dead..3ae5a47fe19f8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-constrained-sdnode.ll
@@ -103,11 +103,11 @@ define <vscale x 4 x bfloat> @vfdiv_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloa
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a0, fa0
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfdiv.vv v10, v10, v12
+; CHECK-NEXT: vfdiv.vv v10, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
; CHECK-NEXT: ret
@@ -138,11 +138,11 @@ define <vscale x 8 x bfloat> @vfdiv_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a0, fa0
; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vmv.v.x v16, a0
; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfdiv.vv v12, v12, v16
+; CHECK-NEXT: vfdiv.vv v12, v12, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
; CHECK-NEXT: ret
@@ -157,11 +157,11 @@ define <vscale x 8 x bfloat> @vfdiv_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a0, fa0
; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vmv.v.x v16, a0
; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfdiv.vv v12, v16, v12
+; CHECK-NEXT: vfdiv.vv v12, v8, v12
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
; CHECK-NEXT: ret
@@ -192,11 +192,11 @@ define <vscale x 16 x bfloat> @vfdiv_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bf
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a0, fa0
; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT: vmv.v.x v12, a0
+; CHECK-NEXT: vmv.v.x v24, a0
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v24
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfdiv.vv v16, v16, v24
+; CHECK-NEXT: vfdiv.vv v16, v16, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
; CHECK-NEXT: ret
@@ -426,11 +426,11 @@ define <vscale x 4 x half> @vfdiv_vf_nxv4f16(<vscale x 4 x half> %va, half %b) s
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a0
+; ZVFHMIN-NEXT: vmv.v.x v12, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v10, v10, v12
+; ZVFHMIN-NEXT: vfdiv.vv v10, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
@@ -474,11 +474,11 @@ define <vscale x 8 x half> @vfdiv_vf_nxv8f16(<vscale x 8 x half> %va, half %b) s
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a0
+; ZVFHMIN-NEXT: vmv.v.x v16, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v12, v12, v16
+; ZVFHMIN-NEXT: vfdiv.vv v12, v12, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
@@ -499,11 +499,11 @@ define <vscale x 8 x half> @vfdiv_fv_nxv8f16(<vscale x 8 x half> %va, half %b) s
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a0
+; ZVFHMIN-NEXT: vmv.v.x v16, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v12, v16, v12
+; ZVFHMIN-NEXT: vfdiv.vv v12, v8, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
@@ -547,11 +547,11 @@ define <vscale x 16 x half> @vfdiv_vf_nxv16f16(<vscale x 16 x half> %va, half %b
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a0
+; ZVFHMIN-NEXT: vmv.v.x v24, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24
+; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll
index f3e483f3686a0..80c0ee2e873c5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll
@@ -189,11 +189,11 @@ define <vscale x 4 x bfloat> @vfdiv_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloa
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a1, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vmv.v.x v12, a1
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9, v0.t
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfdiv.vv v10, v10, v12, v0.t
+; CHECK-NEXT: vfdiv.vv v10, v10, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t
; CHECK-NEXT: ret
@@ -208,11 +208,11 @@ define <vscale x 4 x bfloat> @vfdiv_vf_nxv4bf16_unmasked(<vscale x 4 x bfloat> %
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a1, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vmv.v.x v12, a1
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfdiv.vv v10, v10, v12
+; CHECK-NEXT: vfdiv.vv v10, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
; CHECK-NEXT: ret
@@ -259,11 +259,11 @@ define <vscale x 8 x bfloat> @vfdiv_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a1, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vmv.v.x v16, a1
; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10, v0.t
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfdiv.vv v12, v12, v16, v0.t
+; CHECK-NEXT: vfdiv.vv v12, v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t
; CHECK-NEXT: ret
@@ -278,11 +278,11 @@ define <vscale x 8 x bfloat> @vfdiv_vf_nxv8bf16_unmasked(<vscale x 8 x bfloat> %
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a1, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vmv.v.x v16, a1
; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfdiv.vv v12, v12, v16
+; CHECK-NEXT: vfdiv.vv v12, v12, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
; CHECK-NEXT: ret
@@ -329,11 +329,11 @@ define <vscale x 16 x bfloat> @vfdiv_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bf
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a1, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vmv.v.x v24, a1
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfdiv.vv v16, v16, v24, v0.t
+; CHECK-NEXT: vfdiv.vv v16, v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t
; CHECK-NEXT: ret
@@ -348,11 +348,11 @@ define <vscale x 16 x bfloat> @vfdiv_vf_nxv16bf16_unmasked(<vscale x 16 x bfloat
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a1, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vmv.v.x v24, a1
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v24
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfdiv.vv v16, v16, v24
+; CHECK-NEXT: vfdiv.vv v16, v16, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
; CHECK-NEXT: ret
@@ -837,11 +837,11 @@ define <vscale x 4 x half> @vfdiv_vf_nxv4f16(<vscale x 4 x half> %va, half %b, <
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v10, v10, v12, v0.t
+; ZVFHMIN-NEXT: vfdiv.vv v10, v10, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t
; ZVFHMIN-NEXT: ret
@@ -862,11 +862,11 @@ define <vscale x 4 x half> @vfdiv_vf_nxv4f16_unmasked(<vscale x 4 x half> %va, h
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v10, v10, v12
+; ZVFHMIN-NEXT: vfdiv.vv v10, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
@@ -931,11 +931,11 @@ define <vscale x 8 x half> @vfdiv_vf_nxv8f16(<vscale x 8 x half> %va, half %b, <
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v12, v12, v16, v0.t
+; ZVFHMIN-NEXT: vfdiv.vv v12, v12, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
; ZVFHMIN-NEXT: ret
@@ -956,11 +956,11 @@ define <vscale x 8 x half> @vfdiv_vf_nxv8f16_unmasked(<vscale x 8 x half> %va, h
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v12, v12, v16
+; ZVFHMIN-NEXT: vfdiv.vv v12, v12, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
@@ -1025,11 +1025,11 @@ define <vscale x 16 x half> @vfdiv_vf_nxv16f16(<vscale x 16 x half> %va, half %b
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vmv.v.x v24, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24, v0.t
+; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t
; ZVFHMIN-NEXT: ret
@@ -1050,11 +1050,11 @@ define <vscale x 16 x half> @vfdiv_vf_nxv16f16_unmasked(<vscale x 16 x half> %va
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vmv.v.x v24, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24
+; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll
index c8a8dc98cbbe7..a2869999094a5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll
@@ -300,11 +300,11 @@ define <vscale x 4 x bfloat> @vfma_vf_nxv4bf16_commute(<vscale x 4 x bfloat> %va
; CHECK-NEXT: fmv.x.h a1, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9, v0.t
-; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vmv.v.x v14, a1
; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t
-; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9, v0.t
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v14, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmadd.vv v12, v14, v10, v0.t
+; CHECK-NEXT: vfmadd.vv v12, v8, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t
; CHECK-NEXT: ret
@@ -414,11 +414,11 @@ define <vscale x 8 x bfloat> @vfma_vf_nxv8bf16_commute(<vscale x 8 x bfloat> %va
; CHECK-NEXT: fmv.x.h a1, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10, v0.t
-; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vmv.v.x v20, a1
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t
-; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v10, v0.t
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfmadd.vv v16, v20, v12, v0.t
+; CHECK-NEXT: vfmadd.vv v16, v8, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t
; CHECK-NEXT: ret
@@ -1185,13 +1185,13 @@ define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16_unmasked_commute(<vscale x 32 x
; CHECK-NEXT: fmv.x.h a1, fa0
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: vsetvli a3, zero, e16, m8, ta, ma
-; CHECK-NEXT: vmset.m v24
-; CHECK-NEXT: vmv.v.x v8, a1
+; CHECK-NEXT: vmset.m v8
+; CHECK-NEXT: vmv.v.x v24, a1
; CHECK-NEXT: slli a1, a2, 1
; CHECK-NEXT: srli a2, a2, 2
; CHECK-NEXT: sub a3, a0, a1
; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v24, a2
+; CHECK-NEXT: vslidedown.vx v0, v8, a2
; CHECK-NEXT: sltu a2, a0, a3
; CHECK-NEXT: addi a2, a2, -1
; CHECK-NEXT: and a2, a2, a3
@@ -1201,15 +1201,15 @@ define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16_unmasked_commute(<vscale x 32 x
; CHECK-NEXT: addi a3, a3, 16
; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20, v0.t
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20, v0.t
; CHECK-NEXT: addi a2, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: add a2, sp, a2
; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12, v0.t
+; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28, v0.t
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: mv a3, a2
@@ -1660,11 +1660,11 @@ define <vscale x 4 x half> @vfma_vf_nxv4f16_commute(<vscale x 4 x half> %va, hal
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vmv.v.x v14, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v14, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v12, v14, v10, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v12, v8, v10, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
; ZVFHMIN-NEXT: ret
@@ -1811,11 +1811,11 @@ define <vscale x 8 x half> @vfma_vf_nxv8f16_commute(<vscale x 8 x half> %va, hal
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vmv.v.x v20, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v20, v12, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v12, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t
; ZVFHMIN-NEXT: ret
@@ -2670,13 +2670,13 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_unmasked_commute(<vscale x 32 x ha
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m8, ta, ma
-; ZVFHMIN-NEXT: vmset.m v24
-; ZVFHMIN-NEXT: vmv.v.x v8, a1
+; ZVFHMIN-NEXT: vmset.m v8
+; ZVFHMIN-NEXT: vmv.v.x v24, a1
; ZVFHMIN-NEXT: slli a1, a2, 1
; ZVFHMIN-NEXT: srli a2, a2, 2
; ZVFHMIN-NEXT: sub a3, a0, a1
; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2
+; ZVFHMIN-NEXT: vslidedown.vx v0, v8, a2
; ZVFHMIN-NEXT: sltu a2, a0, a3
; ZVFHMIN-NEXT: addi a2, a2, -1
; ZVFHMIN-NEXT: and a2, a2, a3
@@ -2686,15 +2686,15 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_unmasked_commute(<vscale x 32 x ha
; ZVFHMIN-NEXT: addi a3, a3, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t
; ZVFHMIN-NEXT: addi a2, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 3
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 3
; ZVFHMIN-NEXT: mv a3, a2
@@ -3496,12 +3496,12 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64(<vscale x 16 x double> %va, <vsc
; CHECK-NEXT: add a2, a2, a3
; CHECK-NEXT: add a2, sp, a2
; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: add a2, sp, a2
; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: mv a3, a2
@@ -3511,7 +3511,7 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64(<vscale x 16 x double> %va, <vsc
; CHECK-NEXT: addi a2, a2, 16
; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t
+; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: mv a2, a0
@@ -3529,16 +3529,16 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64(<vscale x 16 x double> %va, <vsc
; CHECK-NEXT: slli a0, a0, 5
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma
-; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t
+; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: mv a1, a0
@@ -5391,12 +5391,12 @@ define <vscale x 4 x half> @vfmsub_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: lui a1, 8
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vxor.vx v10, v10, a1, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
+; ZVFHMIN-NEXT: vxor.vx v12, v10, a1, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v14, v12, v10, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14, v0.t
; ZVFHMIN-NEXT: ret
@@ -5441,16 +5441,16 @@ define <vscale x 4 x half> @vfmsub_vf_nxv4f16(<vscale x 4 x half> %va, half %b,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v16, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v14, v12, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v14, v12, v10, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -5470,16 +5470,16 @@ define <vscale x 4 x half> @vfmsub_vf_nxv4f16_commute(<vscale x 4 x half> %va, h
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vmv.v.x v14, a1
; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v14, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v14, v8, v12, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v12, v8, v10, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14, v0.t
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -5499,16 +5499,16 @@ define <vscale x 4 x half> @vfmsub_vf_nxv4f16_unmasked(<vscale x 4 x half> %va,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vmv.v.x v14, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vxor.vx v8, v9, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vxor.vx v12, v9, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v14
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v12, v14
+; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -5528,16 +5528,16 @@ define <vscale x 4 x half> @vfmsub_vf_nxv4f16_unmasked_commute(<vscale x 4 x hal
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vmv.v.x v14, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vxor.vx v8, v9, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vxor.vx v12, v9, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v14
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v12, v14
+; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -5559,14 +5559,14 @@ define <vscale x 4 x half> @vfnmadd_vv_nxv4f16(<vscale x 4 x half> %va, <vscale
; ZVFHMIN-NEXT: lui a1, 8
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vxor.vx v9, v9, a1, v0.t
-; ZVFHMIN-NEXT: vxor.vx v10, v10, a1, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t
+; ZVFHMIN-NEXT: vxor.vx v12, v10, a1, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v10, v14, v12, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v12, v14, v10, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
; ZVFHMIN-NEXT: ret
%negb = call <vscale x 4 x half> @llvm.vp.fneg.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl)
%negc = call <vscale x 4 x half> @llvm.vp.fneg.nxv4f16(<vscale x 4 x half> %c, <vscale x 4 x i1> %m, i32 %evl)
@@ -5586,12 +5586,12 @@ define <vscale x 4 x half> @vfnmadd_vv_nxv4f16_commuted(<vscale x 4 x half> %va,
; ZVFHMIN-NEXT: lui a1, 8
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vxor.vx v9, v9, a1, v0.t
-; ZVFHMIN-NEXT: vxor.vx v10, v10, a1, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t
+; ZVFHMIN-NEXT: vxor.vx v12, v10, a1, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v14, v12, v10, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14, v0.t
; ZVFHMIN-NEXT: ret
@@ -5613,12 +5613,12 @@ define <vscale x 4 x half> @vfnmadd_vv_nxv4f16_unmasked(<vscale x 4 x half> %va,
; ZVFHMIN-NEXT: lui a1, 8
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vxor.vx v9, v9, a1
-; ZVFHMIN-NEXT: vxor.vx v10, v10, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vxor.vx v12, v10, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12
+; ZVFHMIN-NEXT: vfmadd.vv v14, v12, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14
; ZVFHMIN-NEXT: ret
@@ -5640,12 +5640,12 @@ define <vscale x 4 x half> @vfnmadd_vv_nxv4f16_unmasked_commuted(<vscale x 4 x h
; ZVFHMIN-NEXT: lui a1, 8
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vxor.vx v9, v9, a1
-; ZVFHMIN-NEXT: vxor.vx v10, v10, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vxor.vx v12, v10, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12
+; ZVFHMIN-NEXT: vfmadd.vv v14, v12, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14
; ZVFHMIN-NEXT: ret
@@ -5666,17 +5666,17 @@ define <vscale x 4 x half> @vfnmadd_vf_nxv4f16(<vscale x 4 x half> %va, half %b,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vmv.v.x v14, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t
-; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
+; ZVFHMIN-NEXT: vxor.vx v12, v8, a0, v0.t
+; ZVFHMIN-NEXT: vxor.vx v10, v9, a0, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v14, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v14, v12, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -5697,17 +5697,17 @@ define <vscale x 4 x half> @vfnmadd_vf_nxv4f16_commute(<vscale x 4 x half> %va,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vmv.v.x v14, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t
-; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t
+; ZVFHMIN-NEXT: vxor.vx v12, v8, a0, v0.t
+; ZVFHMIN-NEXT: vxor.vx v10, v9, a0, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v14, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v14, v8, v12, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v10, v12, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14, v0.t
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -5728,17 +5728,17 @@ define <vscale x 4 x half> @vfnmadd_vf_nxv4f16_unmasked(<vscale x 4 x half> %va,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vmv.v.x v14, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
-; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vxor.vx v12, v8, a0
+; ZVFHMIN-NEXT: vxor.vx v10, v9, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v14
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v14, v12
+; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -5759,17 +5759,17 @@ define <vscale x 4 x half> @vfnmadd_vf_nxv4f16_unmasked_commute(<vscale x 4 x ha
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vmv.v.x v14, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
-; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vxor.vx v12, v8, a0
+; ZVFHMIN-NEXT: vxor.vx v10, v9, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v14
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v14, v12
+; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -5792,15 +5792,15 @@ define <vscale x 4 x half> @vfnmadd_vf_nxv4f16_neg_splat(<vscale x 4 x half> %va
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t
+; ZVFHMIN-NEXT: vxor.vx v14, v10, a0, v0.t
; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v10, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v14, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v12, v14, v10, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14, v0.t
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -5823,15 +5823,15 @@ define <vscale x 4 x half> @vfnmadd_vf_nxv4f16_neg_splat_commute(<vscale x 4 x h
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t
+; ZVFHMIN-NEXT: vxor.vx v14, v10, a0, v0.t
; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v10, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v14, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v10, v14, v12, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v14, v12, v10, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -5855,14 +5855,14 @@ define <vscale x 4 x half> @vfnmadd_vf_nxv4f16_neg_splat_unmasked(<vscale x 4 x
; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
-; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vxor.vx v14, v10, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v14
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v10, v14, v12
+; ZVFHMIN-NEXT: vfmadd.vv v14, v12, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -5886,14 +5886,14 @@ define <vscale x 4 x half> @vfnmadd_vf_nxv4f16_neg_splat_unmasked_commute(<vscal
; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
-; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vxor.vx v14, v10, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v14
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v10, v14, v12
+; ZVFHMIN-NEXT: vfmadd.vv v14, v12, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -5916,14 +5916,14 @@ define <vscale x 4 x half> @vfnmsub_vv_nxv4f16(<vscale x 4 x half> %va, <vscale
; ZVFHMIN-NEXT: lui a1, 8
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vxor.vx v9, v9, a1, v0.t
-; ZVFHMIN-NEXT: vxor.vx v10, v10, a1, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t
+; ZVFHMIN-NEXT: vxor.vx v12, v10, a1, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v10, v14, v12, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v12, v14, v10, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
; ZVFHMIN-NEXT: ret
%negb = call <vscale x 4 x half> @llvm.vp.fneg.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl)
%negc = call <vscale x 4 x half> @llvm.vp.fneg.nxv4f16(<vscale x 4 x half> %c, <vscale x 4 x i1> %m, i32 %evl)
@@ -5943,12 +5943,12 @@ define <vscale x 4 x half> @vfnmsub_vv_nxv4f16_commuted(<vscale x 4 x half> %va,
; ZVFHMIN-NEXT: lui a1, 8
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vxor.vx v9, v9, a1, v0.t
-; ZVFHMIN-NEXT: vxor.vx v10, v10, a1, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t
+; ZVFHMIN-NEXT: vxor.vx v12, v10, a1, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v14, v12, v10, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14, v0.t
; ZVFHMIN-NEXT: ret
@@ -5970,12 +5970,12 @@ define <vscale x 4 x half> @vfnmsub_vv_nxv4f16_unmasked(<vscale x 4 x half> %va,
; ZVFHMIN-NEXT: lui a1, 8
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vxor.vx v9, v9, a1
-; ZVFHMIN-NEXT: vxor.vx v10, v10, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vxor.vx v12, v10, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12
+; ZVFHMIN-NEXT: vfmadd.vv v14, v12, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14
; ZVFHMIN-NEXT: ret
@@ -5997,12 +5997,12 @@ define <vscale x 4 x half> @vfnmsub_vv_nxv4f16_unmasked_commuted(<vscale x 4 x h
; ZVFHMIN-NEXT: lui a1, 8
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vxor.vx v9, v9, a1
-; ZVFHMIN-NEXT: vxor.vx v10, v10, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vxor.vx v12, v10, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12
+; ZVFHMIN-NEXT: vfmadd.vv v14, v12, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14
; ZVFHMIN-NEXT: ret
@@ -6023,16 +6023,16 @@ define <vscale x 4 x half> @vfnmsub_vf_nxv4f16(<vscale x 4 x half> %va, half %b,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v16, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v12, v14, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -6052,16 +6052,16 @@ define <vscale x 4 x half> @vfnmsub_vf_nxv4f16_commute(<vscale x 4 x half> %va,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vmv.v.x v14, a1
; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v14, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v12, v8, v14, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v10, v8, v12, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -6081,16 +6081,16 @@ define <vscale x 4 x half> @vfnmsub_vf_nxv4f16_unmasked(<vscale x 4 x half> %va,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vmv.v.x v14, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vxor.vx v12, v8, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v14
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v14, v12
+; ZVFHMIN-NEXT: vfmadd.vv v12, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -6110,16 +6110,16 @@ define <vscale x 4 x half> @vfnmsub_vf_nxv4f16_unmasked_commute(<vscale x 4 x ha
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vmv.v.x v14, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vxor.vx v12, v8, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v14
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v14, v12
+; ZVFHMIN-NEXT: vfmadd.vv v12, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -6141,14 +6141,14 @@ define <vscale x 4 x half> @vfnmsub_vf_nxv4f16_neg_splat(<vscale x 4 x half> %va
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t
+; ZVFHMIN-NEXT: vxor.vx v12, v10, a0, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v12, v14, v10, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v10, v14, v12, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -6170,12 +6170,12 @@ define <vscale x 4 x half> @vfnmsub_vf_nxv4f16_neg_splat_commute(<vscale x 4 x h
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t
+; ZVFHMIN-NEXT: vxor.vx v12, v10, a0, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v14, v12, v10, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14, v0.t
; ZVFHMIN-NEXT: ret
@@ -6197,14 +6197,14 @@ define <vscale x 4 x half> @vfnmsub_vf_nxv4f16_neg_splat_unmasked(<vscale x 4 x
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT: vxor.vx v9, v10, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vxor.vx v9, v12, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12
+; ZVFHMIN-NEXT: vfmadd.vv v14, v12, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14
; ZVFHMIN-NEXT: ret
@@ -6226,14 +6226,14 @@ define <vscale x 4 x half> @vfnmsub_vf_nxv4f16_neg_splat_unmasked_commute(<vscal
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT: vxor.vx v9, v10, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vxor.vx v9, v12, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12
+; ZVFHMIN-NEXT: vfmadd.vv v14, v12, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14
; ZVFHMIN-NEXT: ret
@@ -6258,12 +6258,12 @@ define <vscale x 8 x half> @vfmsub_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: lui a1, 8
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vxor.vx v12, v12, a1, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
+; ZVFHMIN-NEXT: vxor.vx v16, v12, a1, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v16, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v20, v12, v16, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20, v0.t
; ZVFHMIN-NEXT: ret
@@ -6308,16 +6308,16 @@ define <vscale x 8 x half> @vfmsub_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vmv.v.x v24, a1
; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v24, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v16, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -6337,16 +6337,16 @@ define <vscale x 8 x half> @vfmsub_vf_nxv8f16_commute(<vscale x 8 x half> %va, h
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vmv.v.x v20, a1
; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v20, v8, v16, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v12, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20, v0.t
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -6366,16 +6366,16 @@ define <vscale x 8 x half> @vfmsub_vf_nxv8f16_unmasked(<vscale x 8 x half> %va,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vmv.v.x v20, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vxor.vx v8, v10, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vxor.vx v16, v10, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v20
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v20
+; ZVFHMIN-NEXT: vfmadd.vv v16, v12, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -6395,16 +6395,16 @@ define <vscale x 8 x half> @vfmsub_vf_nxv8f16_unmasked_commute(<vscale x 8 x hal
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vmv.v.x v20, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vxor.vx v8, v10, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vxor.vx v16, v10, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v20
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v20
+; ZVFHMIN-NEXT: vfmadd.vv v16, v12, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -6426,14 +6426,14 @@ define <vscale x 8 x half> @vfnmadd_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
; ZVFHMIN-NEXT: lui a1, 8
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vxor.vx v10, v10, a1, v0.t
-; ZVFHMIN-NEXT: vxor.vx v12, v12, a1, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT: vxor.vx v16, v12, a1, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v16, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v12, v20, v16, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v16, v20, v12, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t
; ZVFHMIN-NEXT: ret
%negb = call <vscale x 8 x half> @llvm.vp.fneg.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl)
%negc = call <vscale x 8 x half> @llvm.vp.fneg.nxv8f16(<vscale x 8 x half> %c, <vscale x 8 x i1> %m, i32 %evl)
@@ -6453,12 +6453,12 @@ define <vscale x 8 x half> @vfnmadd_vv_nxv8f16_commuted(<vscale x 8 x half> %va,
; ZVFHMIN-NEXT: lui a1, 8
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vxor.vx v10, v10, a1, v0.t
-; ZVFHMIN-NEXT: vxor.vx v12, v12, a1, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT: vxor.vx v16, v12, a1, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v16, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v20, v12, v16, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20, v0.t
; ZVFHMIN-NEXT: ret
@@ -6480,12 +6480,12 @@ define <vscale x 8 x half> @vfnmadd_vv_nxv8f16_unmasked(<vscale x 8 x half> %va,
; ZVFHMIN-NEXT: lui a1, 8
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vxor.vx v10, v10, a1
-; ZVFHMIN-NEXT: vxor.vx v12, v12, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT: vxor.vx v16, v12, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v20, v12, v16
+; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20
; ZVFHMIN-NEXT: ret
@@ -6507,12 +6507,12 @@ define <vscale x 8 x half> @vfnmadd_vv_nxv8f16_unmasked_commuted(<vscale x 8 x h
; ZVFHMIN-NEXT: lui a1, 8
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vxor.vx v10, v10, a1
-; ZVFHMIN-NEXT: vxor.vx v12, v12, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT: vxor.vx v16, v12, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v20, v12, v16
+; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20
; ZVFHMIN-NEXT: ret
@@ -6533,17 +6533,17 @@ define <vscale x 8 x half> @vfnmadd_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vmv.v.x v20, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t
-; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
+; ZVFHMIN-NEXT: vxor.vx v16, v8, a0, v0.t
+; ZVFHMIN-NEXT: vxor.vx v12, v10, a0, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v16, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v20, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v16, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v16, v12, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -6564,17 +6564,17 @@ define <vscale x 8 x half> @vfnmadd_vf_nxv8f16_commute(<vscale x 8 x half> %va,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vmv.v.x v20, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t
-; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t
+; ZVFHMIN-NEXT: vxor.vx v16, v8, a0, v0.t
+; ZVFHMIN-NEXT: vxor.vx v12, v10, a0, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v16, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v20, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v20, v8, v16, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v12, v16, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20, v0.t
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -6595,17 +6595,17 @@ define <vscale x 8 x half> @vfnmadd_vf_nxv8f16_unmasked(<vscale x 8 x half> %va,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vmv.v.x v20, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
-; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vxor.vx v16, v8, a0
+; ZVFHMIN-NEXT: vxor.vx v12, v10, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v20
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v16
+; ZVFHMIN-NEXT: vfmadd.vv v16, v12, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -6626,17 +6626,17 @@ define <vscale x 8 x half> @vfnmadd_vf_nxv8f16_unmasked_commute(<vscale x 8 x ha
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vmv.v.x v20, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
-; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vxor.vx v16, v8, a0
+; ZVFHMIN-NEXT: vxor.vx v12, v10, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v20
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v16
+; ZVFHMIN-NEXT: vfmadd.vv v16, v12, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -6659,15 +6659,15 @@ define <vscale x 8 x half> @vfnmadd_vf_nxv8f16_neg_splat(<vscale x 8 x half> %va
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vxor.vx v12, v12, a0, v0.t
+; ZVFHMIN-NEXT: vxor.vx v20, v12, a0, v0.t
; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v12, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v20, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v20, v12, v16, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v16, v20, v12, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20, v0.t
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -6690,15 +6690,15 @@ define <vscale x 8 x half> @vfnmadd_vf_nxv8f16_neg_splat_commute(<vscale x 8 x h
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vxor.vx v12, v12, a0, v0.t
+; ZVFHMIN-NEXT: vxor.vx v20, v12, a0, v0.t
; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v12, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v20, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v12, v20, v16, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -6722,14 +6722,14 @@ define <vscale x 8 x half> @vfnmadd_vf_nxv8f16_neg_splat_unmasked(<vscale x 8 x
; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
-; ZVFHMIN-NEXT: vxor.vx v12, v12, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vxor.vx v20, v12, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v20
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v12, v20, v16
+; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -6753,14 +6753,14 @@ define <vscale x 8 x half> @vfnmadd_vf_nxv8f16_neg_splat_unmasked_commute(<vscal
; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
-; ZVFHMIN-NEXT: vxor.vx v12, v12, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vxor.vx v20, v12, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v20
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v12, v20, v16
+; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -6783,14 +6783,14 @@ define <vscale x 8 x half> @vfnmsub_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
; ZVFHMIN-NEXT: lui a1, 8
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vxor.vx v10, v10, a1, v0.t
-; ZVFHMIN-NEXT: vxor.vx v12, v12, a1, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT: vxor.vx v16, v12, a1, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v16, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v12, v20, v16, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v16, v20, v12, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t
; ZVFHMIN-NEXT: ret
%negb = call <vscale x 8 x half> @llvm.vp.fneg.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl)
%negc = call <vscale x 8 x half> @llvm.vp.fneg.nxv8f16(<vscale x 8 x half> %c, <vscale x 8 x i1> %m, i32 %evl)
@@ -6810,12 +6810,12 @@ define <vscale x 8 x half> @vfnmsub_vv_nxv8f16_commuted(<vscale x 8 x half> %va,
; ZVFHMIN-NEXT: lui a1, 8
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vxor.vx v10, v10, a1, v0.t
-; ZVFHMIN-NEXT: vxor.vx v12, v12, a1, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT: vxor.vx v16, v12, a1, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v16, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v20, v12, v16, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20, v0.t
; ZVFHMIN-NEXT: ret
@@ -6837,12 +6837,12 @@ define <vscale x 8 x half> @vfnmsub_vv_nxv8f16_unmasked(<vscale x 8 x half> %va,
; ZVFHMIN-NEXT: lui a1, 8
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vxor.vx v10, v10, a1
-; ZVFHMIN-NEXT: vxor.vx v12, v12, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT: vxor.vx v16, v12, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v20, v12, v16
+; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20
; ZVFHMIN-NEXT: ret
@@ -6864,12 +6864,12 @@ define <vscale x 8 x half> @vfnmsub_vv_nxv8f16_unmasked_commuted(<vscale x 8 x h
; ZVFHMIN-NEXT: lui a1, 8
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vxor.vx v10, v10, a1
-; ZVFHMIN-NEXT: vxor.vx v12, v12, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT: vxor.vx v16, v12, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v20, v12, v16
+; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20
; ZVFHMIN-NEXT: ret
@@ -6890,16 +6890,16 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vmv.v.x v24, a1
; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v24, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v20, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v20, v12, v16, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -6919,16 +6919,16 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16_commute(<vscale x 8 x half> %va,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vmv.v.x v20, a1
; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v20, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v12, v8, v16, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -6948,16 +6948,16 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16_unmasked(<vscale x 8 x half> %va,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vmv.v.x v20, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT: vxor.vx v16, v8, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v20
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v16
+; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -6977,16 +6977,16 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16_unmasked_commute(<vscale x 8 x ha
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vmv.v.x v20, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT: vxor.vx v16, v8, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v20
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v16
+; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -7008,14 +7008,14 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16_neg_splat(<vscale x 8 x half> %va
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vxor.vx v12, v12, a0, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT: vxor.vx v16, v12, a0, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v16, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v20, v12, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v12, v20, v16, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -7037,12 +7037,12 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16_neg_splat_commute(<vscale x 8 x h
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vxor.vx v12, v12, a0, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT: vxor.vx v16, v12, a0, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v16, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v20, v12, v16, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20, v0.t
; ZVFHMIN-NEXT: ret
@@ -7064,14 +7064,14 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16_neg_splat_unmasked(<vscale x 8 x
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT: vxor.vx v10, v12, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT: vxor.vx v10, v16, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v20, v12, v16
+; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20
; ZVFHMIN-NEXT: ret
@@ -7093,14 +7093,14 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16_neg_splat_unmasked_commute(<vscal
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT: vxor.vx v10, v12, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT: vxor.vx v10, v16, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v20, v12, v16
+; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20
; ZVFHMIN-NEXT: ret
@@ -7127,8 +7127,8 @@ define <vscale x 16 x half> @vfmsub_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
; ZVFHMIN-NEXT: vmv4r.v v4, v12
; ZVFHMIN-NEXT: vmv4r.v v20, v8
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vxor.vx v24, v16, a0, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t
+; ZVFHMIN-NEXT: vxor.vx v16, v16, a0, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
@@ -7234,34 +7234,18 @@ define <vscale x 16 x half> @vfmsub_vf_nxv16f16_unmasked(<vscale x 16 x half> %v
;
; ZVFHMIN-LABEL: vfmsub_vf_nxv16f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: addi sp, sp, -16
-; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: slli a1, a1, 2
-; ZVFHMIN-NEXT: sub sp, sp, a1
-; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v16, a1
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vmv.v.x v4, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT: vxor.vx v8, v12, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vxor.vx v24, v12, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v0
+; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 2
-; ZVFHMIN-NEXT: add sp, sp, a0
-; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
-; ZVFHMIN-NEXT: addi sp, sp, 16
-; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -7279,34 +7263,18 @@ define <vscale x 16 x half> @vfmsub_vf_nxv16f16_unmasked_commute(<vscale x 16 x
;
; ZVFHMIN-LABEL: vfmsub_vf_nxv16f16_unmasked_commute:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: addi sp, sp, -16
-; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: slli a1, a1, 2
-; ZVFHMIN-NEXT: sub sp, sp, a1
-; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v16, a1
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vmv.v.x v4, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT: vxor.vx v8, v12, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vxor.vx v24, v12, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v0
+; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 2
-; ZVFHMIN-NEXT: add sp, sp, a0
-; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
-; ZVFHMIN-NEXT: addi sp, sp, 16
-; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -7328,10 +7296,10 @@ define <vscale x 16 x half> @vfnmadd_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFHMIN-NEXT: vmv4r.v v4, v8
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vxor.vx v12, v12, a0, v0.t
-; ZVFHMIN-NEXT: vxor.vx v24, v16, a0, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
+; ZVFHMIN-NEXT: vxor.vx v8, v12, a0, v0.t
+; ZVFHMIN-NEXT: vxor.vx v12, v16, a0, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t
@@ -7357,8 +7325,8 @@ define <vscale x 16 x half> @vfnmadd_vv_nxv16f16_commuted(<vscale x 16 x half> %
; ZVFHMIN-NEXT: vmv4r.v v4, v8
; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vxor.vx v20, v12, a0, v0.t
-; ZVFHMIN-NEXT: vxor.vx v24, v16, a0, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t
+; ZVFHMIN-NEXT: vxor.vx v16, v16, a0, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
@@ -7384,12 +7352,12 @@ define <vscale x 16 x half> @vfnmadd_vv_nxv16f16_unmasked(<vscale x 16 x half> %
; ZVFHMIN-NEXT: lui a1, 8
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFHMIN-NEXT: vxor.vx v12, v12, a1
-; ZVFHMIN-NEXT: vxor.vx v16, v16, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vxor.vx v24, v16, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24
+; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
; ZVFHMIN-NEXT: ret
@@ -7411,12 +7379,12 @@ define <vscale x 16 x half> @vfnmadd_vv_nxv16f16_unmasked_commuted(<vscale x 16
; ZVFHMIN-NEXT: lui a1, 8
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFHMIN-NEXT: vxor.vx v12, v12, a1
-; ZVFHMIN-NEXT: vxor.vx v16, v16, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vxor.vx v24, v16, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24
+; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
; ZVFHMIN-NEXT: ret
@@ -7439,15 +7407,15 @@ define <vscale x 16 x half> @vfnmadd_vf_nxv16f16(<vscale x 16 x half> %va, half
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v4, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vxor.vx v20, v8, a0, v0.t
-; ZVFHMIN-NEXT: vxor.vx v24, v12, a0, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4, v0.t
+; ZVFHMIN-NEXT: vxor.vx v24, v8, a0, v0.t
+; ZVFHMIN-NEXT: vxor.vx v16, v12, a0, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -7470,10 +7438,10 @@ define <vscale x 16 x half> @vfnmadd_vf_nxv16f16_commute(<vscale x 16 x half> %v
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v4, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vxor.vx v28, v8, a0, v0.t
+; ZVFHMIN-NEXT: vxor.vx v24, v8, a0, v0.t
; ZVFHMIN-NEXT: vxor.vx v16, v12, a0, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t
@@ -7497,35 +7465,19 @@ define <vscale x 16 x half> @vfnmadd_vf_nxv16f16_unmasked(<vscale x 16 x half> %
;
; ZVFHMIN-LABEL: vfnmadd_vf_nxv16f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: addi sp, sp, -16
-; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: slli a1, a1, 2
-; ZVFHMIN-NEXT: sub sp, sp, a1
-; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v16, a1
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vmv.v.x v4, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
-; ZVFHMIN-NEXT: vxor.vx v12, v12, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vxor.vx v24, v8, a0
+; ZVFHMIN-NEXT: vxor.vx v16, v12, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v0, v24
+; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 2
-; ZVFHMIN-NEXT: add sp, sp, a0
-; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
-; ZVFHMIN-NEXT: addi sp, sp, 16
-; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -7541,38 +7493,22 @@ define <vscale x 16 x half> @vfnmadd_vf_nxv16f16_unmasked_commute(<vscale x 16 x
; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFH-NEXT: vfnmadd.vf v8, fa0, v12
; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: vfnmadd_vf_nxv16f16_unmasked_commute:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: addi sp, sp, -16
-; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: slli a1, a1, 2
-; ZVFHMIN-NEXT: sub sp, sp, a1
-; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
+;
+; ZVFHMIN-LABEL: vfnmadd_vf_nxv16f16_unmasked_commute:
+; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v16, a1
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vmv.v.x v4, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
-; ZVFHMIN-NEXT: vxor.vx v12, v12, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vxor.vx v24, v8, a0
+; ZVFHMIN-NEXT: vxor.vx v16, v12, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v0, v24
+; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 2
-; ZVFHMIN-NEXT: add sp, sp, a0
-; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
-; ZVFHMIN-NEXT: addi sp, sp, 16
-; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -7591,35 +7527,20 @@ define <vscale x 16 x half> @vfnmadd_vf_nxv16f16_neg_splat(<vscale x 16 x half>
;
; ZVFHMIN-LABEL: vfnmadd_vf_nxv16f16_neg_splat:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: addi sp, sp, -16
-; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: slli a1, a1, 2
-; ZVFHMIN-NEXT: sub sp, sp, a1
-; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
-; ZVFHMIN-NEXT: addi a1, sp, 16
-; ZVFHMIN-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v16, a1
+; ZVFHMIN-NEXT: vmv4r.v v4, v8
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vmv.v.x v8, a0
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vxor.vx v4, v16, a0, v0.t
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t
; ZVFHMIN-NEXT: vxor.vx v12, v12, a0, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4, v0.t
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl4r.v v4, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 2
-; ZVFHMIN-NEXT: add sp, sp, a0
-; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
-; ZVFHMIN-NEXT: addi sp, sp, 16
-; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -7638,35 +7559,20 @@ define <vscale x 16 x half> @vfnmadd_vf_nxv16f16_neg_splat_commute(<vscale x 16
;
; ZVFHMIN-LABEL: vfnmadd_vf_nxv16f16_neg_splat_commute:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: addi sp, sp, -16
-; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: slli a1, a1, 2
-; ZVFHMIN-NEXT: sub sp, sp, a1
-; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
-; ZVFHMIN-NEXT: addi a1, sp, 16
-; ZVFHMIN-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v16, a1
+; ZVFHMIN-NEXT: vmv4r.v v4, v8
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vmv.v.x v8, a0
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vxor.vx v4, v16, a0, v0.t
-; ZVFHMIN-NEXT: vxor.vx v16, v12, a0, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4, v0.t
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl4r.v v4, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vxor.vx v16, v8, a0, v0.t
+; ZVFHMIN-NEXT: vxor.vx v20, v12, a0, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 2
-; ZVFHMIN-NEXT: add sp, sp, a0
-; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
-; ZVFHMIN-NEXT: addi sp, sp, 16
-; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -7690,14 +7596,14 @@ define <vscale x 16 x half> @vfnmadd_vf_nxv16f16_neg_splat_unmasked(<vscale x 16
; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vxor.vx v12, v12, a0
-; ZVFHMIN-NEXT: vxor.vx v16, v16, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vxor.vx v4, v16, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v0, v24
+; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -7721,14 +7627,14 @@ define <vscale x 16 x half> @vfnmadd_vf_nxv16f16_neg_splat_unmasked_commute(<vsc
; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vxor.vx v12, v12, a0
-; ZVFHMIN-NEXT: vxor.vx v16, v16, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vxor.vx v4, v16, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v0, v24
+; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -7751,10 +7657,10 @@ define <vscale x 16 x half> @vfnmsub_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFHMIN-NEXT: vmv4r.v v4, v8
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vxor.vx v12, v12, a0, v0.t
-; ZVFHMIN-NEXT: vxor.vx v24, v16, a0, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
+; ZVFHMIN-NEXT: vxor.vx v8, v12, a0, v0.t
+; ZVFHMIN-NEXT: vxor.vx v12, v16, a0, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t
@@ -7780,8 +7686,8 @@ define <vscale x 16 x half> @vfnmsub_vv_nxv16f16_commuted(<vscale x 16 x half> %
; ZVFHMIN-NEXT: vmv4r.v v4, v8
; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vxor.vx v20, v12, a0, v0.t
-; ZVFHMIN-NEXT: vxor.vx v24, v16, a0, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t
+; ZVFHMIN-NEXT: vxor.vx v16, v16, a0, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
@@ -7807,12 +7713,12 @@ define <vscale x 16 x half> @vfnmsub_vv_nxv16f16_unmasked(<vscale x 16 x half> %
; ZVFHMIN-NEXT: lui a1, 8
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFHMIN-NEXT: vxor.vx v12, v12, a1
-; ZVFHMIN-NEXT: vxor.vx v16, v16, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vxor.vx v24, v16, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24
+; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
; ZVFHMIN-NEXT: ret
@@ -7834,12 +7740,12 @@ define <vscale x 16 x half> @vfnmsub_vv_nxv16f16_unmasked_commuted(<vscale x 16
; ZVFHMIN-NEXT: lui a1, 8
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFHMIN-NEXT: vxor.vx v12, v12, a1
-; ZVFHMIN-NEXT: vxor.vx v16, v16, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vxor.vx v24, v16, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24
+; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
; ZVFHMIN-NEXT: ret
@@ -7917,34 +7823,18 @@ define <vscale x 16 x half> @vfnmsub_vf_nxv16f16_unmasked(<vscale x 16 x half> %
;
; ZVFHMIN-LABEL: vfnmsub_vf_nxv16f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: addi sp, sp, -16
-; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: slli a1, a1, 2
-; ZVFHMIN-NEXT: sub sp, sp, a1
-; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v16, a1
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vmv.v.x v4, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vxor.vx v24, v8, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v0, v24
+; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 2
-; ZVFHMIN-NEXT: add sp, sp, a0
-; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
-; ZVFHMIN-NEXT: addi sp, sp, 16
-; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -7962,34 +7852,18 @@ define <vscale x 16 x half> @vfnmsub_vf_nxv16f16_unmasked_commute(<vscale x 16 x
;
; ZVFHMIN-LABEL: vfnmsub_vf_nxv16f16_unmasked_commute:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: addi sp, sp, -16
-; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: slli a1, a1, 2
-; ZVFHMIN-NEXT: sub sp, sp, a1
-; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v16, a1
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vmv.v.x v4, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vxor.vx v24, v8, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v0, v24
+; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 2
-; ZVFHMIN-NEXT: add sp, sp, a0
-; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
-; ZVFHMIN-NEXT: addi sp, sp, 16
-; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -8010,10 +7884,10 @@ define <vscale x 16 x half> @vfnmsub_vf_nxv16f16_neg_splat(<vscale x 16 x half>
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFHMIN-NEXT: vmv4r.v v4, v8
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
-; ZVFHMIN-NEXT: vmv.v.x v16, a0
+; ZVFHMIN-NEXT: vmv.v.x v8, a0
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vxor.vx v24, v16, a0, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24, v0.t
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
@@ -8038,14 +7912,14 @@ define <vscale x 16 x half> @vfnmsub_vf_nxv16f16_neg_splat_commute(<vscale x 16
; ZVFHMIN-LABEL: vfnmsub_vf_nxv16f16_neg_splat_commute:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv4r.v v20, v12
+; ZVFHMIN-NEXT: vmv4r.v v16, v12
; ZVFHMIN-NEXT: vmv4r.v v4, v8
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
-; ZVFHMIN-NEXT: vmv.v.x v16, a0
+; ZVFHMIN-NEXT: vmv.v.x v8, a0
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vxor.vx v24, v16, a0, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t
+; ZVFHMIN-NEXT: vxor.vx v20, v8, a0, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t
@@ -8070,14 +7944,14 @@ define <vscale x 16 x half> @vfnmsub_vf_nxv16f16_neg_splat_unmasked(<vscale x 16
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v16, a1
+; ZVFHMIN-NEXT: vmv.v.x v24, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT: vxor.vx v12, v16, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vxor.vx v12, v24, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24
+; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
; ZVFHMIN-NEXT: ret
@@ -8099,14 +7973,14 @@ define <vscale x 16 x half> @vfnmsub_vf_nxv16f16_neg_splat_unmasked_commute(<vsc
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v16, a1
+; ZVFHMIN-NEXT: vmv.v.x v24, a1
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT: vxor.vx v12, v16, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vxor.vx v12, v24, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24
+; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
; ZVFHMIN-NEXT: ret
@@ -8275,18 +8149,18 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16_unmasked(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli a2, zero, e8, m4, ta, ma
; ZVFHMIN-NEXT: vmv8r.v v24, v8
-; ZVFHMIN-NEXT: vl8re16.v v8, (a0)
+; ZVFHMIN-NEXT: vl8re16.v v16, (a0)
; ZVFHMIN-NEXT: lui a2, 8
-; ZVFHMIN-NEXT: vmset.m v16
+; ZVFHMIN-NEXT: vmset.m v8
; ZVFHMIN-NEXT: csrr a3, vlenb
; ZVFHMIN-NEXT: slli a0, a3, 1
; ZVFHMIN-NEXT: srli a3, a3, 2
; ZVFHMIN-NEXT: sub a4, a1, a0
; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a3
+; ZVFHMIN-NEXT: vslidedown.vx v0, v8, a3
; ZVFHMIN-NEXT: sltu a3, a1, a4
; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; ZVFHMIN-NEXT: vxor.vx v16, v8, a2
+; ZVFHMIN-NEXT: vxor.vx v16, v16, a2
; ZVFHMIN-NEXT: addi a3, a3, -1
; ZVFHMIN-NEXT: and a3, a3, a4
; ZVFHMIN-NEXT: vmv4r.v v8, v16
@@ -9294,35 +9168,35 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_unmasked_commuted(<vscale x 32
; ZVFHMIN-NEXT: vl8re16.v v24, (a0)
; ZVFHMIN-NEXT: lui a2, 8
; ZVFHMIN-NEXT: vsetvli a0, zero, e8, m4, ta, ma
-; ZVFHMIN-NEXT: vmset.m v7
+; ZVFHMIN-NEXT: vmset.m v8
; ZVFHMIN-NEXT: csrr a3, vlenb
; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; ZVFHMIN-NEXT: vxor.vx v8, v16, a2
+; ZVFHMIN-NEXT: vxor.vx v16, v16, a2
; ZVFHMIN-NEXT: slli a0, a3, 1
; ZVFHMIN-NEXT: srli a3, a3, 2
; ZVFHMIN-NEXT: sub a4, a1, a0
; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT: vslidedown.vx v0, v7, a3
+; ZVFHMIN-NEXT: vslidedown.vx v0, v8, a3
; ZVFHMIN-NEXT: sltu a3, a1, a4
; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; ZVFHMIN-NEXT: vxor.vx v16, v24, a2
+; ZVFHMIN-NEXT: vxor.vx v8, v24, a2
; ZVFHMIN-NEXT: addi a3, a3, -1
; ZVFHMIN-NEXT: and a3, a3, a4
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 3
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
; ZVFHMIN-NEXT: addi a2, sp, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 4
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
+; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 3
; ZVFHMIN-NEXT: mv a3, a2
@@ -10179,17 +10053,17 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_unmasked(<vscale x 32
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: lui a2, 8
; ZVFHMIN-NEXT: vsetvli a3, zero, e8, m4, ta, ma
-; ZVFHMIN-NEXT: vmset.m v24
+; ZVFHMIN-NEXT: vmset.m v7
; ZVFHMIN-NEXT: csrr a3, vlenb
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v0, a1
-; ZVFHMIN-NEXT: vxor.vx v8, v16, a2
+; ZVFHMIN-NEXT: vmv.v.x v24, a1
+; ZVFHMIN-NEXT: vxor.vx v16, v16, a2
; ZVFHMIN-NEXT: slli a1, a3, 1
; ZVFHMIN-NEXT: srli a3, a3, 2
-; ZVFHMIN-NEXT: vxor.vx v16, v0, a2
+; ZVFHMIN-NEXT: vxor.vx v8, v24, a2
; ZVFHMIN-NEXT: sub a2, a0, a1
; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a3
+; ZVFHMIN-NEXT: vslidedown.vx v0, v7, a3
; ZVFHMIN-NEXT: sltu a3, a0, a2
; ZVFHMIN-NEXT: addi a3, a3, -1
; ZVFHMIN-NEXT: and a2, a3, a2
@@ -10197,17 +10071,17 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_unmasked(<vscale x 32
; ZVFHMIN-NEXT: slli a3, a3, 4
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t
; ZVFHMIN-NEXT: addi a2, sp, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 3
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t
+; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 3
; ZVFHMIN-NEXT: mv a3, a2
@@ -10302,10 +10176,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_unmasked_commute(<vsc
; ZVFHMIN-NEXT: csrr a3, vlenb
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v24, a1
-; ZVFHMIN-NEXT: vxor.vx v8, v16, a2
+; ZVFHMIN-NEXT: vxor.vx v16, v16, a2
; ZVFHMIN-NEXT: slli a1, a3, 1
; ZVFHMIN-NEXT: srli a3, a3, 2
-; ZVFHMIN-NEXT: vxor.vx v16, v24, a2
+; ZVFHMIN-NEXT: vxor.vx v8, v24, a2
; ZVFHMIN-NEXT: sub a2, a0, a1
; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslidedown.vx v0, v7, a3
@@ -10316,17 +10190,17 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_unmasked_commute(<vsc
; ZVFHMIN-NEXT: slli a3, a3, 4
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t
; ZVFHMIN-NEXT: addi a2, sp, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 3
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t
+; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 3
; ZVFHMIN-NEXT: mv a3, a2
@@ -10810,35 +10684,35 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_unmasked_commuted(<vscale x 32
; ZVFHMIN-NEXT: vl8re16.v v24, (a0)
; ZVFHMIN-NEXT: lui a2, 8
; ZVFHMIN-NEXT: vsetvli a0, zero, e8, m4, ta, ma
-; ZVFHMIN-NEXT: vmset.m v7
+; ZVFHMIN-NEXT: vmset.m v8
; ZVFHMIN-NEXT: csrr a3, vlenb
; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; ZVFHMIN-NEXT: vxor.vx v8, v16, a2
+; ZVFHMIN-NEXT: vxor.vx v16, v16, a2
; ZVFHMIN-NEXT: slli a0, a3, 1
; ZVFHMIN-NEXT: srli a3, a3, 2
; ZVFHMIN-NEXT: sub a4, a1, a0
; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT: vslidedown.vx v0, v7, a3
+; ZVFHMIN-NEXT: vslidedown.vx v0, v8, a3
; ZVFHMIN-NEXT: sltu a3, a1, a4
; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; ZVFHMIN-NEXT: vxor.vx v16, v24, a2
+; ZVFHMIN-NEXT: vxor.vx v8, v24, a2
; ZVFHMIN-NEXT: addi a3, a3, -1
; ZVFHMIN-NEXT: and a3, a3, a4
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 3
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
; ZVFHMIN-NEXT: addi a2, sp, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 4
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
+; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 3
; ZVFHMIN-NEXT: mv a3, a2
@@ -11684,45 +11558,49 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_unmasked(<vscale x 32
; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
+; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: csrr a1, vlenb
+; ZVFHMIN-NEXT: slli a1, a1, 4
+; ZVFHMIN-NEXT: add a1, sp, a1
+; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: lui a2, 8
; ZVFHMIN-NEXT: vsetvli a3, zero, e8, m4, ta, ma
-; ZVFHMIN-NEXT: vmset.m v24
+; ZVFHMIN-NEXT: vmset.m v8
; ZVFHMIN-NEXT: csrr a3, vlenb
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v8, a1
+; ZVFHMIN-NEXT: vmv.v.x v24, a1
; ZVFHMIN-NEXT: slli a1, a3, 1
; ZVFHMIN-NEXT: srli a3, a3, 2
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a2
+; ZVFHMIN-NEXT: vxor.vx v16, v24, a2
; ZVFHMIN-NEXT: sub a2, a0, a1
; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a3
+; ZVFHMIN-NEXT: vslidedown.vx v0, v8, a3
; ZVFHMIN-NEXT: sltu a3, a0, a2
; ZVFHMIN-NEXT: addi a3, a3, -1
; ZVFHMIN-NEXT: and a2, a3, a2
+; ZVFHMIN-NEXT: vmv4r.v v8, v16
; ZVFHMIN-NEXT: csrr a3, vlenb
; ZVFHMIN-NEXT: slli a3, a3, 3
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
-; ZVFHMIN-NEXT: vmv4r.v v8, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: slli a2, a2, 4
+; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: mv a3, a2
+; ZVFHMIN-NEXT: slli a2, a2, 1
+; ZVFHMIN-NEXT: add a2, a2, a3
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vmv8r.v v8, v16
+; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t
; ZVFHMIN-NEXT: addi a2, sp, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: slli a2, a2, 3
-; ZVFHMIN-NEXT: mv a3, a2
-; ZVFHMIN-NEXT: slli a2, a2, 1
-; ZVFHMIN-NEXT: add a2, a2, a3
+; ZVFHMIN-NEXT: slli a2, a2, 4
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
@@ -11747,16 +11625,16 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_unmasked(<vscale x 32
; ZVFHMIN-NEXT: addi a0, sp, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: mv a1, a0
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: add a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
-; ZVFHMIN-NEXT: mv a1, a0
-; ZVFHMIN-NEXT: slli a0, a0, 1
-; ZVFHMIN-NEXT: add a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll
index f6769601f488e..a03629d17e612 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll
@@ -161,16 +161,16 @@ define <vscale x 4 x half> @vfmsub_vf_nxv4f16(<vscale x 4 x half> %va, <vscale x
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a0
+; ZVFHMIN-NEXT: vmv.v.x v14, a0
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vxor.vx v8, v9, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vxor.vx v12, v9, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v14
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v12, v14
+; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 4 x half> poison, half %c, i32 0
%splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -217,16 +217,16 @@ define <vscale x 8 x half> @vfmsub_vf_nxv8f16(<vscale x 8 x half> %va, <vscale x
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a0
+; ZVFHMIN-NEXT: vmv.v.x v20, a0
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT: vxor.vx v16, v8, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v20
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v20
+; ZVFHMIN-NEXT: vfmadd.vv v16, v12, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 8 x half> poison, half %c, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -246,32 +246,16 @@ define <vscale x 16 x half> @vfmsub_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
;
; ZVFHMIN-LABEL: vfmsub_vv_nxv16f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: addi sp, sp, -16
-; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 2
-; ZVFHMIN-NEXT: sub sp, sp, a0
-; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vxor.vx v8, v12, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFHMIN-NEXT: vxor.vx v20, v12, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v0
+; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 2
-; ZVFHMIN-NEXT: add sp, sp, a0
-; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
-; ZVFHMIN-NEXT: addi sp, sp, 16
-; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 16 x half> %vb
%vd = call <vscale x 16 x half> @llvm.experimental.constrained.fma.nxv16f16(<vscale x 16 x half> %vc, <vscale x 16 x half> %va, <vscale x 16 x half> %neg, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -287,34 +271,18 @@ define <vscale x 16 x half> @vfmsub_vf_nxv16f16(<vscale x 16 x half> %va, <vscal
;
; ZVFHMIN-LABEL: vfmsub_vf_nxv16f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: addi sp, sp, -16
-; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 2
-; ZVFHMIN-NEXT: sub sp, sp, a0
-; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v16, a0
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vmv.v.x v4, a0
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT: vxor.vx v8, v12, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vxor.vx v24, v12, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v0
+; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 2
-; ZVFHMIN-NEXT: add sp, sp, a0
-; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
-; ZVFHMIN-NEXT: addi sp, sp, 16
-; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 16 x half> poison, half %c, i32 0
%splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-constrained-sdnode.ll
index 9e6b1f6fefd08..e083e2e1fd072 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmul-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-constrained-sdnode.ll
@@ -103,11 +103,11 @@ define <vscale x 4 x bfloat> @vfmul_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloa
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a0, fa0
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmul.vv v10, v10, v12
+; CHECK-NEXT: vfmul.vv v10, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
; CHECK-NEXT: ret
@@ -138,11 +138,11 @@ define <vscale x 8 x bfloat> @vfmul_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a0, fa0
; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vmv.v.x v16, a0
; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfmul.vv v12, v12, v16
+; CHECK-NEXT: vfmul.vv v12, v12, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
; CHECK-NEXT: ret
@@ -173,11 +173,11 @@ define <vscale x 16 x bfloat> @vfmul_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bf
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a0, fa0
; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT: vmv.v.x v12, a0
+; CHECK-NEXT: vmv.v.x v24, a0
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v24
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmul.vv v16, v16, v24
+; CHECK-NEXT: vfmul.vv v16, v16, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
; CHECK-NEXT: ret
@@ -397,11 +397,11 @@ define <vscale x 4 x half> @vfmul_vf_nxv4f16(<vscale x 4 x half> %va, half %b) s
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a0
+; ZVFHMIN-NEXT: vmv.v.x v12, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmul.vv v10, v10, v12
+; ZVFHMIN-NEXT: vfmul.vv v10, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
@@ -445,11 +445,11 @@ define <vscale x 8 x half> @vfmul_vf_nxv8f16(<vscale x 8 x half> %va, half %b) s
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a0
+; ZVFHMIN-NEXT: vmv.v.x v16, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmul.vv v12, v12, v16
+; ZVFHMIN-NEXT: vfmul.vv v12, v12, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
@@ -493,11 +493,11 @@ define <vscale x 16 x half> @vfmul_vf_nxv16f16(<vscale x 16 x half> %va, half %b
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a0
+; ZVFHMIN-NEXT: vmv.v.x v24, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmul.vv v16, v16, v24
+; ZVFHMIN-NEXT: vfmul.vv v16, v16, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll
index 67d5f1b7c82ef..fc9c798a48bdf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll
@@ -251,11 +251,11 @@ define <vscale x 4 x half> @vfmul_vf_nxv4f16(<vscale x 4 x half> %va, half %b, <
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmul.vv v10, v10, v12, v0.t
+; ZVFHMIN-NEXT: vfmul.vv v10, v10, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t
; ZVFHMIN-NEXT: ret
@@ -276,11 +276,11 @@ define <vscale x 4 x half> @vfmul_vf_nxv4f16_unmasked(<vscale x 4 x half> %va, h
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmul.vv v10, v10, v12
+; ZVFHMIN-NEXT: vfmul.vv v10, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
@@ -345,11 +345,11 @@ define <vscale x 8 x half> @vfmul_vf_nxv8f16(<vscale x 8 x half> %va, half %b, <
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmul.vv v12, v12, v16, v0.t
+; ZVFHMIN-NEXT: vfmul.vv v12, v12, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
; ZVFHMIN-NEXT: ret
@@ -370,11 +370,11 @@ define <vscale x 8 x half> @vfmul_vf_nxv8f16_unmasked(<vscale x 8 x half> %va, h
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmul.vv v12, v12, v16
+; ZVFHMIN-NEXT: vfmul.vv v12, v12, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
@@ -439,11 +439,11 @@ define <vscale x 16 x half> @vfmul_vf_nxv16f16(<vscale x 16 x half> %va, half %b
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vmv.v.x v24, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmul.vv v16, v16, v24, v0.t
+; ZVFHMIN-NEXT: vfmul.vv v16, v16, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t
; ZVFHMIN-NEXT: ret
@@ -464,11 +464,11 @@ define <vscale x 16 x half> @vfmul_vf_nxv16f16_unmasked(<vscale x 16 x half> %va
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vmv.v.x v24, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmul.vv v16, v16, v24
+; ZVFHMIN-NEXT: vfmul.vv v16, v16, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll
index b8ec285b5c34e..e4b467c12791d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll
@@ -145,12 +145,12 @@ define <vscale x 4 x half> @vfnmsub_vv_nxv4f16(<vscale x 4 x half> %va, <vscale
; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
-; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vxor.vx v12, v10, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12
+; ZVFHMIN-NEXT: vfmadd.vv v14, v12, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14
; ZVFHMIN-NEXT: ret
@@ -174,14 +174,14 @@ define <vscale x 4 x half> @vfnmsub_vf_nxv4f16(<vscale x 4 x half> %va, <vscale
; ZVFHMIN-NEXT: vmv.v.x v10, a0
; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
-; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vxor.vx v14, v10, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v14
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v10, v14, v12
+; ZVFHMIN-NEXT: vfmadd.vv v14, v12, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 4 x half> poison, half %c, i32 0
%splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -204,15 +204,15 @@ define <vscale x 8 x half> @vfnmsub_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vxor.vx v14, v10, a0
+; ZVFHMIN-NEXT: vxor.vx v16, v8, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v14
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v16
+; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 8 x half> %vb
%neg2 = fneg <vscale x 8 x half> %va
@@ -234,14 +234,14 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16(<vscale x 8 x half> %va, <vscale
; ZVFHMIN-NEXT: vmv.v.x v12, a0
; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
-; ZVFHMIN-NEXT: vxor.vx v12, v12, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT: vxor.vx v20, v12, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v20
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v12, v20, v16
+; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 8 x half> poison, half %c, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -264,15 +264,15 @@ define <vscale x 16 x half> @vfnmsub_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vxor.vx v16, v16, a0
+; ZVFHMIN-NEXT: vxor.vx v4, v16, a0
; ZVFHMIN-NEXT: vxor.vx v12, v12, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v0, v24
+; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 16 x half> %vc
%neg2 = fneg <vscale x 16 x half> %vb
@@ -294,14 +294,14 @@ define <vscale x 16 x half> @vfnmsub_vf_nxv16f16(<vscale x 16 x half> %va, <vsca
; ZVFHMIN-NEXT: vmv.v.x v16, a0
; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vxor.vx v12, v12, a0
-; ZVFHMIN-NEXT: vxor.vx v16, v16, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vxor.vx v4, v16, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v0, v24
+; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 16 x half> poison, half %c, i32 0
%splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll
index 742083f6f684b..ff612bdcd2725 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll
@@ -161,14 +161,14 @@ define <vscale x 4 x half> @vfnmsub_vf_nxv4f16(<vscale x 4 x half> %va, <vscale
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a0
+; ZVFHMIN-NEXT: vmv.v.x v12, a0
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT: vxor.vx v9, v10, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vxor.vx v9, v12, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12
+; ZVFHMIN-NEXT: vfmadd.vv v14, v12, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14
; ZVFHMIN-NEXT: ret
@@ -193,13 +193,13 @@ define <vscale x 8 x half> @vfnmsub_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vxor.vx v8, v10, a0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vxor.vx v14, v10, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v14
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v16
+; ZVFHMIN-NEXT: vfmadd.vv v20, v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 8 x half> %vb
%vd = call <vscale x 8 x half> @llvm.experimental.constrained.fma.nxv8f16(<vscale x 8 x half> %neg, <vscale x 8 x half> %vc, <vscale x 8 x half> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -217,14 +217,14 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16(<vscale x 8 x half> %va, <vscale
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a0
+; ZVFHMIN-NEXT: vmv.v.x v16, a0
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vxor.vx v8, v12, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vxor.vx v8, v16, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v20, v12, v16
+; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20
; ZVFHMIN-NEXT: ret
@@ -273,14 +273,14 @@ define <vscale x 16 x half> @vfnmsub_vf_nxv16f16(<vscale x 16 x half> %va, <vsca
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v16, a0
+; ZVFHMIN-NEXT: vmv.v.x v24, a0
; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT: vxor.vx v12, v16, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vxor.vx v12, v24, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24
+; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
; ZVFHMIN-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfpext-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfpext-constrained-sdnode.ll
index 3999b97d6e256..9c77a6818bcb2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfpext-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfpext-constrained-sdnode.ll
@@ -59,8 +59,8 @@ define <vscale x 4 x float> @vfpext_nxv4f16_nxv4f32(<vscale x 4 x half> %va) str
; CHECK-LABEL: vfpext_nxv4f16_nxv4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.f.v v8, v10
; CHECK-NEXT: ret
%evec = call <vscale x 4 x float> @llvm.experimental.constrained.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %va, metadata !"fpexcept.strict")
ret <vscale x 4 x float> %evec
@@ -84,8 +84,8 @@ define <vscale x 8 x float> @vfpext_nxv8f16_nxv8f32(<vscale x 8 x half> %va) str
; CHECK-LABEL: vfpext_nxv8f16_nxv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.f.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.f.f.v v8, v12
; CHECK-NEXT: ret
%evec = call <vscale x 8 x float> @llvm.experimental.constrained.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %va, metadata !"fpexcept.strict")
ret <vscale x 8 x float> %evec
@@ -121,8 +121,8 @@ define <vscale x 2 x double> @vfpext_nxv2f32_nxv2f64(<vscale x 2 x float> %va) s
; CHECK-LABEL: vfpext_nxv2f32_nxv2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.f.v v8, v10
; CHECK-NEXT: ret
%evec = call <vscale x 2 x double> @llvm.experimental.constrained.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %va, metadata !"fpexcept.strict")
ret <vscale x 2 x double> %evec
@@ -133,8 +133,8 @@ define <vscale x 4 x double> @vfpext_nxv4f32_nxv4f64(<vscale x 4 x float> %va) s
; CHECK-LABEL: vfpext_nxv4f32_nxv4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.f.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.f.f.v v8, v12
; CHECK-NEXT: ret
%evec = call <vscale x 4 x double> @llvm.experimental.constrained.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %va, metadata !"fpexcept.strict")
ret <vscale x 4 x double> %evec
@@ -145,8 +145,8 @@ define <vscale x 8 x double> @vfpext_nxv8f32_nxv8f64(<vscale x 8 x float> %va) s
; CHECK-LABEL: vfpext_nxv8f32_nxv8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfwcvt.f.f.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.f.f.v v8, v16
; CHECK-NEXT: ret
%evec = call <vscale x 8 x double> @llvm.experimental.constrained.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %va, metadata !"fpexcept.strict")
ret <vscale x 8 x double> %evec
@@ -207,8 +207,8 @@ define <vscale x 4 x float> @vfpext_nxv4bf16_nxv4f32(<vscale x 4 x bfloat> %va)
; CHECK-LABEL: vfpext_nxv4bf16_nxv4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
; CHECK-NEXT: ret
%evec = call <vscale x 4 x float> @llvm.experimental.constrained.fpext.nxv4f32.nxv4bf16(<vscale x 4 x bfloat> %va, metadata !"fpexcept.strict")
ret <vscale x 4 x float> %evec
@@ -232,8 +232,8 @@ define <vscale x 8 x float> @vfpext_nxv8bf16_nxv8f32(<vscale x 8 x bfloat> %va)
; CHECK-LABEL: vfpext_nxv8bf16_nxv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v12
; CHECK-NEXT: ret
%evec = call <vscale x 8 x float> @llvm.experimental.constrained.fpext.nxv8f32.nxv8bf16(<vscale x 8 x bfloat> %va, metadata !"fpexcept.strict")
ret <vscale x 8 x float> %evec
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll
index 3b2de0185f90c..557cc3fea1592 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll
@@ -63,8 +63,8 @@ define <vscale x 4 x float> @vfpext_nxv4f16_nxv4f32(<vscale x 4 x half> %va) {
; CHECK-LABEL: vfpext_nxv4f16_nxv4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.f.v v8, v10
; CHECK-NEXT: ret
%evec = fpext <vscale x 4 x half> %va to <vscale x 4 x float>
ret <vscale x 4 x float> %evec
@@ -88,8 +88,8 @@ define <vscale x 8 x float> @vfpext_nxv8f16_nxv8f32(<vscale x 8 x half> %va) {
; CHECK-LABEL: vfpext_nxv8f16_nxv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.f.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.f.f.v v8, v12
; CHECK-NEXT: ret
%evec = fpext <vscale x 8 x half> %va to <vscale x 8 x float>
ret <vscale x 8 x float> %evec
@@ -113,8 +113,8 @@ define <vscale x 16 x float> @vfpext_nxv16f16_nxv16f32(<vscale x 16 x half> %va)
; CHECK-LABEL: vfpext_nxv16f16_nxv16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvt.f.f.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.f.f.v v8, v16
; CHECK-NEXT: ret
%evec = fpext <vscale x 16 x half> %va to <vscale x 16 x float>
ret <vscale x 16 x float> %evec
@@ -137,8 +137,8 @@ define <vscale x 2 x double> @vfpext_nxv2f32_nxv2f64(<vscale x 2 x float> %va) {
; CHECK-LABEL: vfpext_nxv2f32_nxv2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.f.v v8, v10
; CHECK-NEXT: ret
%evec = fpext <vscale x 2 x float> %va to <vscale x 2 x double>
ret <vscale x 2 x double> %evec
@@ -149,8 +149,8 @@ define <vscale x 4 x double> @vfpext_nxv4f32_nxv4f64(<vscale x 4 x float> %va) {
; CHECK-LABEL: vfpext_nxv4f32_nxv4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.f.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.f.f.v v8, v12
; CHECK-NEXT: ret
%evec = fpext <vscale x 4 x float> %va to <vscale x 4 x double>
ret <vscale x 4 x double> %evec
@@ -161,8 +161,8 @@ define <vscale x 8 x double> @vfpext_nxv8f32_nxv8f64(<vscale x 8 x float> %va) {
; CHECK-LABEL: vfpext_nxv8f32_nxv8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfwcvt.f.f.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.f.f.v v8, v16
; CHECK-NEXT: ret
%evec = fpext <vscale x 8 x float> %va to <vscale x 8 x double>
ret <vscale x 8 x double> %evec
@@ -223,8 +223,8 @@ define <vscale x 4 x float> @vfpext_nxv4bf16_nxv4f32(<vscale x 4 x bfloat> %va)
; CHECK-LABEL: vfpext_nxv4bf16_nxv4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
; CHECK-NEXT: ret
%evec = fpext <vscale x 4 x bfloat> %va to <vscale x 4 x float>
ret <vscale x 4 x float> %evec
@@ -248,8 +248,8 @@ define <vscale x 8 x float> @vfpext_nxv8bf16_nxv8f32(<vscale x 8 x bfloat> %va)
; CHECK-LABEL: vfpext_nxv8bf16_nxv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v12
; CHECK-NEXT: ret
%evec = fpext <vscale x 8 x bfloat> %va to <vscale x 8 x float>
ret <vscale x 8 x float> %evec
@@ -273,8 +273,8 @@ define <vscale x 16 x float> @vfpext_nxv16bf16_nxv16f32(<vscale x 16 x bfloat> %
; CHECK-LABEL: vfpext_nxv16bf16_nxv16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16
; CHECK-NEXT: ret
%evec = fpext <vscale x 16 x bfloat> %va to <vscale x 16 x float>
ret <vscale x 16 x float> %evec
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll
index 6debc47460683..137b616d86fcc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll
@@ -60,8 +60,8 @@ define <vscale x 2 x double> @vfpext_nxv2f32_nxv2f64(<vscale x 2 x float> %a, <v
; CHECK-LABEL: vfpext_nxv2f32_nxv2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.f.v v10, v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> %m, i32 %vl)
ret <vscale x 2 x double> %v
@@ -71,8 +71,8 @@ define <vscale x 2 x double> @vfpext_nxv2f32_nxv2f64_unmasked(<vscale x 2 x floa
; CHECK-LABEL: vfpext_nxv2f32_nxv2f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.f.v v8, v10
; CHECK-NEXT: ret
%v = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl)
ret <vscale x 2 x double> %v
@@ -84,8 +84,8 @@ define <vscale x 7 x double> @vfpext_nxv7f32_nxv7f64(<vscale x 7 x float> %a, <v
; CHECK-LABEL: vfpext_nxv7f32_nxv7f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vfwcvt.f.f.v v16, v8, v0.t
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.f.f.v v8, v16, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 7 x double> @llvm.vp.fpext.nxv7f64.nxv7f32(<vscale x 7 x float> %a, <vscale x 7 x i1> %m, i32 %vl)
ret <vscale x 7 x double> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptoi-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoi-constrained-sdnode.ll
index 6ebdcec4a403c..9e7f4ede29f54 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfptoi-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfptoi-constrained-sdnode.ll
@@ -349,8 +349,8 @@ define <vscale x 4 x i32> @vfptosi_nxv4f16_nxv4i32(<vscale x 4 x half> %va) stri
; CHECK-LABEL: vfptosi_nxv4f16_nxv4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v10
; CHECK-NEXT: ret
%evec = call <vscale x 4 x i32> @llvm.experimental.constrained.fptosi.nxv4i32.nxv4f16(<vscale x 4 x half> %va, metadata !"fpexcept.strict")
ret <vscale x 4 x i32> %evec
@@ -361,8 +361,8 @@ define <vscale x 4 x i32> @vfptoui_nxv4f16_nxv4i32(<vscale x 4 x half> %va) stri
; CHECK-LABEL: vfptoui_nxv4f16_nxv4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v10
; CHECK-NEXT: ret
%evec = call <vscale x 4 x i32> @llvm.experimental.constrained.fptoui.nxv4i32.nxv4f16(<vscale x 4 x half> %va, metadata !"fpexcept.strict")
ret <vscale x 4 x i32> %evec
@@ -471,8 +471,8 @@ define <vscale x 8 x i32> @vfptosi_nxv8f16_nxv8i32(<vscale x 8 x half> %va) stri
; CHECK-LABEL: vfptosi_nxv8f16_nxv8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v12
; CHECK-NEXT: ret
%evec = call <vscale x 8 x i32> @llvm.experimental.constrained.fptosi.nxv8i32.nxv8f16(<vscale x 8 x half> %va, metadata !"fpexcept.strict")
ret <vscale x 8 x i32> %evec
@@ -483,8 +483,8 @@ define <vscale x 8 x i32> @vfptoui_nxv8f16_nxv8i32(<vscale x 8 x half> %va) stri
; CHECK-LABEL: vfptoui_nxv8f16_nxv8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v12
; CHECK-NEXT: ret
%evec = call <vscale x 8 x i32> @llvm.experimental.constrained.fptoui.nxv8i32.nxv8f16(<vscale x 8 x half> %va, metadata !"fpexcept.strict")
ret <vscale x 8 x i32> %evec
@@ -593,8 +593,8 @@ define <vscale x 16 x i32> @vfptosi_nxv16f16_nxv16i32(<vscale x 16 x half> %va)
; CHECK-LABEL: vfptosi_nxv16f16_nxv16i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.x.f.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v16
; CHECK-NEXT: ret
%evec = call <vscale x 16 x i32> @llvm.experimental.constrained.fptosi.nxv16i32.nxv16f16(<vscale x 16 x half> %va, metadata !"fpexcept.strict")
ret <vscale x 16 x i32> %evec
@@ -605,8 +605,8 @@ define <vscale x 16 x i32> @vfptoui_nxv16f16_nxv16i32(<vscale x 16 x half> %va)
; CHECK-LABEL: vfptoui_nxv16f16_nxv16i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.xu.f.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v16
; CHECK-NEXT: ret
%evec = call <vscale x 16 x i32> @llvm.experimental.constrained.fptoui.nxv16i32.nxv16f16(<vscale x 16 x half> %va, metadata !"fpexcept.strict")
ret <vscale x 16 x i32> %evec
@@ -909,8 +909,8 @@ define <vscale x 2 x i64> @vfptosi_nxv2f32_nxv2i64(<vscale x 2 x float> %va) str
; CHECK-LABEL: vfptosi_nxv2f32_nxv2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v10
; CHECK-NEXT: ret
%evec = call <vscale x 2 x i64> @llvm.experimental.constrained.fptosi.nxv2i64.nxv2f32(<vscale x 2 x float> %va, metadata !"fpexcept.strict")
ret <vscale x 2 x i64> %evec
@@ -921,8 +921,8 @@ define <vscale x 2 x i64> @vfptoui_nxv2f32_nxv2i64(<vscale x 2 x float> %va) str
; CHECK-LABEL: vfptoui_nxv2f32_nxv2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v10
; CHECK-NEXT: ret
%evec = call <vscale x 2 x i64> @llvm.experimental.constrained.fptoui.nxv2i64.nxv2f32(<vscale x 2 x float> %va, metadata !"fpexcept.strict")
ret <vscale x 2 x i64> %evec
@@ -1031,8 +1031,8 @@ define <vscale x 4 x i64> @vfptosi_nxv4f32_nxv4i64(<vscale x 4 x float> %va) str
; CHECK-LABEL: vfptosi_nxv4f32_nxv4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v12
; CHECK-NEXT: ret
%evec = call <vscale x 4 x i64> @llvm.experimental.constrained.fptosi.nxv4i64.nxv4f32(<vscale x 4 x float> %va, metadata !"fpexcept.strict")
ret <vscale x 4 x i64> %evec
@@ -1043,8 +1043,8 @@ define <vscale x 4 x i64> @vfptoui_nxv4f32_nxv4i64(<vscale x 4 x float> %va) str
; CHECK-LABEL: vfptoui_nxv4f32_nxv4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v12
; CHECK-NEXT: ret
%evec = call <vscale x 4 x i64> @llvm.experimental.constrained.fptoui.nxv4i64.nxv4f32(<vscale x 4 x float> %va, metadata !"fpexcept.strict")
ret <vscale x 4 x i64> %evec
@@ -1153,8 +1153,8 @@ define <vscale x 8 x i64> @vfptosi_nxv8f32_nxv8i64(<vscale x 8 x float> %va) str
; CHECK-LABEL: vfptosi_nxv8f32_nxv8i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.x.f.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v16
; CHECK-NEXT: ret
%evec = call <vscale x 8 x i64> @llvm.experimental.constrained.fptosi.nxv8i64.nxv8f32(<vscale x 8 x float> %va, metadata !"fpexcept.strict")
ret <vscale x 8 x i64> %evec
@@ -1165,8 +1165,8 @@ define <vscale x 8 x i64> @vfptoui_nxv8f32_nxv8i64(<vscale x 8 x float> %va) str
; CHECK-LABEL: vfptoui_nxv8f32_nxv8i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.xu.f.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v16
; CHECK-NEXT: ret
%evec = call <vscale x 8 x i64> @llvm.experimental.constrained.fptoui.nxv8i64.nxv8f32(<vscale x 8 x float> %va, metadata !"fpexcept.strict")
ret <vscale x 8 x i64> %evec
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll
index d707b4254d3e1..df7af4d8b1667 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll
@@ -1246,8 +1246,8 @@ define <vscale x 4 x i32> @vfptosi_nxv4f16_nxv4i32(<vscale x 4 x half> %va) {
; ZVFH-LABEL: vfptosi_nxv4f16_nxv4i32:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; ZVFH-NEXT: vfwcvt.rtz.x.f.v v10, v8
-; ZVFH-NEXT: vmv2r.v v8, v10
+; ZVFH-NEXT: vmv1r.v v10, v8
+; ZVFH-NEXT: vfwcvt.rtz.x.f.v v8, v10
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfptosi_nxv4f16_nxv4i32:
@@ -1265,8 +1265,8 @@ define <vscale x 4 x i32> @vfptoui_nxv4f16_nxv4i32(<vscale x 4 x half> %va) {
; ZVFH-LABEL: vfptoui_nxv4f16_nxv4i32:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; ZVFH-NEXT: vfwcvt.rtz.xu.f.v v10, v8
-; ZVFH-NEXT: vmv2r.v v8, v10
+; ZVFH-NEXT: vmv1r.v v10, v8
+; ZVFH-NEXT: vfwcvt.rtz.xu.f.v v8, v10
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfptoui_nxv4f16_nxv4i32:
@@ -1424,8 +1424,8 @@ define <vscale x 8 x i32> @vfptosi_nxv8f16_nxv8i32(<vscale x 8 x half> %va) {
; ZVFH-LABEL: vfptosi_nxv8f16_nxv8i32:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT: vfwcvt.rtz.x.f.v v12, v8
-; ZVFH-NEXT: vmv4r.v v8, v12
+; ZVFH-NEXT: vmv2r.v v12, v8
+; ZVFH-NEXT: vfwcvt.rtz.x.f.v v8, v12
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfptosi_nxv8f16_nxv8i32:
@@ -1443,8 +1443,8 @@ define <vscale x 8 x i32> @vfptoui_nxv8f16_nxv8i32(<vscale x 8 x half> %va) {
; ZVFH-LABEL: vfptoui_nxv8f16_nxv8i32:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT: vfwcvt.rtz.xu.f.v v12, v8
-; ZVFH-NEXT: vmv4r.v v8, v12
+; ZVFH-NEXT: vmv2r.v v12, v8
+; ZVFH-NEXT: vfwcvt.rtz.xu.f.v v8, v12
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfptoui_nxv8f16_nxv8i32:
@@ -1602,8 +1602,8 @@ define <vscale x 16 x i32> @vfptosi_nxv16f16_nxv16i32(<vscale x 16 x half> %va)
; ZVFH-LABEL: vfptosi_nxv16f16_nxv16i32:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFH-NEXT: vfwcvt.rtz.x.f.v v16, v8
-; ZVFH-NEXT: vmv8r.v v8, v16
+; ZVFH-NEXT: vmv4r.v v16, v8
+; ZVFH-NEXT: vfwcvt.rtz.x.f.v v8, v16
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfptosi_nxv16f16_nxv16i32:
@@ -1621,8 +1621,8 @@ define <vscale x 16 x i32> @vfptoui_nxv16f16_nxv16i32(<vscale x 16 x half> %va)
; ZVFH-LABEL: vfptoui_nxv16f16_nxv16i32:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFH-NEXT: vfwcvt.rtz.xu.f.v v16, v8
-; ZVFH-NEXT: vmv8r.v v8, v16
+; ZVFH-NEXT: vmv4r.v v16, v8
+; ZVFH-NEXT: vfwcvt.rtz.xu.f.v v8, v16
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfptoui_nxv16f16_nxv16i32:
@@ -1990,8 +1990,8 @@ define <vscale x 2 x i64> @vfptosi_nxv2f32_nxv2i64(<vscale x 2 x float> %va) {
; CHECK-LABEL: vfptosi_nxv2f32_nxv2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v10
; CHECK-NEXT: ret
%evec = fptosi <vscale x 2 x float> %va to <vscale x 2 x i64>
ret <vscale x 2 x i64> %evec
@@ -2001,8 +2001,8 @@ define <vscale x 2 x i64> @vfptoui_nxv2f32_nxv2i64(<vscale x 2 x float> %va) {
; CHECK-LABEL: vfptoui_nxv2f32_nxv2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v10
; CHECK-NEXT: ret
%evec = fptoui <vscale x 2 x float> %va to <vscale x 2 x i64>
ret <vscale x 2 x i64> %evec
@@ -2102,8 +2102,8 @@ define <vscale x 4 x i64> @vfptosi_nxv4f32_nxv4i64(<vscale x 4 x float> %va) {
; CHECK-LABEL: vfptosi_nxv4f32_nxv4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v12
; CHECK-NEXT: ret
%evec = fptosi <vscale x 4 x float> %va to <vscale x 4 x i64>
ret <vscale x 4 x i64> %evec
@@ -2113,8 +2113,8 @@ define <vscale x 4 x i64> @vfptoui_nxv4f32_nxv4i64(<vscale x 4 x float> %va) {
; CHECK-LABEL: vfptoui_nxv4f32_nxv4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v12
; CHECK-NEXT: ret
%evec = fptoui <vscale x 4 x float> %va to <vscale x 4 x i64>
ret <vscale x 4 x i64> %evec
@@ -2214,8 +2214,8 @@ define <vscale x 8 x i64> @vfptosi_nxv8f32_nxv8i64(<vscale x 8 x float> %va) {
; CHECK-LABEL: vfptosi_nxv8f32_nxv8i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.x.f.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v16
; CHECK-NEXT: ret
%evec = fptosi <vscale x 8 x float> %va to <vscale x 8 x i64>
ret <vscale x 8 x i64> %evec
@@ -2225,8 +2225,8 @@ define <vscale x 8 x i64> @vfptoui_nxv8f32_nxv8i64(<vscale x 8 x float> %va) {
; CHECK-LABEL: vfptoui_nxv8f32_nxv8i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.xu.f.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v16
; CHECK-NEXT: ret
%evec = fptoui <vscale x 8 x float> %va to <vscale x 8 x i64>
ret <vscale x 8 x i64> %evec
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll
index 402a28396a303..f94f709626443 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll
@@ -365,8 +365,8 @@ define <vscale x 2 x i64> @vfptosi_nxv2i64_nxv2f32(<vscale x 2 x float> %va, <vs
; CHECK-LABEL: vfptosi_nxv2i64_nxv2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v10, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 2 x i64> @llvm.vp.fptosi.nxv2i64.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i64> %v
@@ -376,8 +376,8 @@ define <vscale x 2 x i64> @vfptosi_nxv2i64_nxv2f32_unmasked(<vscale x 2 x float>
; CHECK-LABEL: vfptosi_nxv2i64_nxv2f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v10
; CHECK-NEXT: ret
%v = call <vscale x 2 x i64> @llvm.vp.fptosi.nxv2i64.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
ret <vscale x 2 x i64> %v
@@ -491,8 +491,7 @@ define <vscale x 32 x i16> @vfptosi_nxv32i16_nxv32f32(<vscale x 32 x float> %va,
; CHECK-LABEL: vfptosi_nxv32i16_nxv32f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv1r.v v7, v0
-; CHECK-NEXT: vmv8r.v v24, v16
+; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a2, a1, 2
; CHECK-NEXT: slli a1, a1, 1
@@ -502,15 +501,15 @@ define <vscale x 32 x i16> @vfptosi_nxv32i16_nxv32f32(<vscale x 32 x float> %va,
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT: vfncvt.rtz.x.f.w v20, v24, v0.t
+; CHECK-NEXT: vfncvt.rtz.x.f.w v28, v16, v0.t
; CHECK-NEXT: bltu a0, a1, .LBB34_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB34_2:
-; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vfncvt.rtz.x.f.w v16, v8, v0.t
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vfncvt.rtz.x.f.w v24, v8, v0.t
+; CHECK-NEXT: vmv8r.v v8, v24
; CHECK-NEXT: ret
%v = call <vscale x 32 x i16> @llvm.vp.fptosi.nxv32i16.nxv32f32(<vscale x 32 x float> %va, <vscale x 32 x i1> %m, i32 %evl)
ret <vscale x 32 x i16> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll
index 51f7cec96e85c..7aae383049deb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll
@@ -365,8 +365,8 @@ define <vscale x 2 x i64> @vfptoui_nxv2i64_nxv2f32(<vscale x 2 x float> %va, <vs
; CHECK-LABEL: vfptoui_nxv2i64_nxv2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v10, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 2 x i64> @llvm.vp.fptoui.nxv2i64.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i64> %v
@@ -376,8 +376,8 @@ define <vscale x 2 x i64> @vfptoui_nxv2i64_nxv2f32_unmasked(<vscale x 2 x float>
; CHECK-LABEL: vfptoui_nxv2i64_nxv2f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v10
; CHECK-NEXT: ret
%v = call <vscale x 2 x i64> @llvm.vp.fptoui.nxv2i64.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
ret <vscale x 2 x i64> %v
@@ -491,8 +491,7 @@ define <vscale x 32 x i16> @vfptoui_nxv32i16_nxv32f32(<vscale x 32 x float> %va,
; CHECK-LABEL: vfptoui_nxv32i16_nxv32f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv1r.v v7, v0
-; CHECK-NEXT: vmv8r.v v24, v16
+; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a2, a1, 2
; CHECK-NEXT: slli a1, a1, 1
@@ -502,15 +501,15 @@ define <vscale x 32 x i16> @vfptoui_nxv32i16_nxv32f32(<vscale x 32 x float> %va,
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT: vfncvt.rtz.xu.f.w v20, v24, v0.t
+; CHECK-NEXT: vfncvt.rtz.xu.f.w v28, v16, v0.t
; CHECK-NEXT: bltu a0, a1, .LBB34_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB34_2:
-; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vfncvt.rtz.xu.f.w v16, v8, v0.t
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vfncvt.rtz.xu.f.w v24, v8, v0.t
+; CHECK-NEXT: vmv8r.v v8, v24
; CHECK-NEXT: ret
%v = call <vscale x 32 x i16> @llvm.vp.fptoui.nxv32i16.nxv32f32(<vscale x 32 x float> %va, <vscale x 32 x i1> %m, i32 %evl)
ret <vscale x 32 x i16> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll
index 3adb84f8da20f..5c5abed707257 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll
@@ -97,8 +97,7 @@ define <vscale x 16 x float> @vfptrunc_nxv16f32_nxv16f64(<vscale x 16 x double>
; CHECK-LABEL: vfptrunc_nxv16f32_nxv16f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vmv1r.v v7, v0
-; CHECK-NEXT: vmv8r.v v24, v16
+; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a2, a1, 3
; CHECK-NEXT: sub a3, a0, a1
@@ -107,15 +106,15 @@ define <vscale x 16 x float> @vfptrunc_nxv16f32_nxv16f64(<vscale x 16 x double>
; CHECK-NEXT: addi a2, a2, -1
; CHECK-NEXT: and a2, a2, a3
; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma
-; CHECK-NEXT: vfncvt.f.f.w v20, v24, v0.t
+; CHECK-NEXT: vfncvt.f.f.w v28, v16, v0.t
; CHECK-NEXT: bltu a0, a1, .LBB7_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB7_2:
-; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vfncvt.f.f.w v24, v8, v0.t
+; CHECK-NEXT: vmv8r.v v8, v24
; CHECK-NEXT: ret
%v = call <vscale x 16 x float> @llvm.vp.fptrunc.nxv16f64.nxv16f32(<vscale x 16 x double> %a, <vscale x 16 x i1> %m, i32 %vl)
ret <vscale x 16 x float> %v
@@ -142,7 +141,7 @@ define <vscale x 32 x float> @vfptrunc_nxv32f32_nxv32f64(<vscale x 32 x double>
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: vl8re64.v v8, (a0)
+; CHECK-NEXT: vl8re64.v v16, (a0)
; CHECK-NEXT: srli a5, a1, 3
; CHECK-NEXT: slli a4, a1, 3
; CHECK-NEXT: slli a3, a1, 1
@@ -156,20 +155,20 @@ define <vscale x 32 x float> @vfptrunc_nxv32f32_nxv32f64(<vscale x 32 x double>
; CHECK-NEXT: addi a7, a7, -1
; CHECK-NEXT: and a4, a7, a4
; CHECK-NEXT: srli a7, a1, 2
-; CHECK-NEXT: vl8re64.v v24, (a6)
-; CHECK-NEXT: vslidedown.vx v16, v0, a7
+; CHECK-NEXT: vl8re64.v v8, (a6)
+; CHECK-NEXT: vslidedown.vx v24, v0, a7
; CHECK-NEXT: vsetvli a6, zero, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vx v6, v0, a5
-; CHECK-NEXT: vslidedown.vx v0, v16, a5
+; CHECK-NEXT: vslidedown.vx v0, v24, a5
; CHECK-NEXT: bltu a0, a1, .LBB8_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB8_2:
; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma
-; CHECK-NEXT: vfncvt.f.f.w v20, v24, v0.t
-; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vfncvt.f.f.w v28, v8, v0.t
+; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t
+; CHECK-NEXT: vfncvt.f.f.w v24, v16, v0.t
; CHECK-NEXT: bltu a2, a3, .LBB8_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: mv a2, a3
@@ -180,9 +179,9 @@ define <vscale x 32 x float> @vfptrunc_nxv32f32_nxv32f64(<vscale x 32 x double>
; CHECK-NEXT: and a0, a3, a0
; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: addi a3, sp, 16
-; CHECK-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vfncvt.f.f.w v12, v24, v0.t
+; CHECK-NEXT: vfncvt.f.f.w v12, v16, v0.t
; CHECK-NEXT: bltu a2, a1, .LBB8_6
; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: mv a2, a1
@@ -192,9 +191,10 @@ define <vscale x 32 x float> @vfptrunc_nxv32f32_nxv32f64(<vscale x 32 x double>
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma
-; CHECK-NEXT: vfncvt.f.f.w v8, v24, v0.t
+; CHECK-NEXT: vfncvt.f.f.w v8, v16, v0.t
+; CHECK-NEXT: vmv8r.v v16, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-constrained-sdnode.ll
index fbb14ab43e075..e6493ea229a50 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsub-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-constrained-sdnode.ll
@@ -103,11 +103,11 @@ define <vscale x 4 x bfloat> @vfsub_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloa
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a0, fa0
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfsub.vv v10, v10, v12
+; CHECK-NEXT: vfsub.vv v10, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
; CHECK-NEXT: ret
@@ -138,11 +138,11 @@ define <vscale x 8 x bfloat> @vfsub_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a0, fa0
; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vmv.v.x v16, a0
; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfsub.vv v12, v12, v16
+; CHECK-NEXT: vfsub.vv v12, v12, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
; CHECK-NEXT: ret
@@ -157,11 +157,11 @@ define <vscale x 8 x bfloat> @vfsub_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a0, fa0
; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vmv.v.x v16, a0
; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfsub.vv v12, v16, v12
+; CHECK-NEXT: vfsub.vv v12, v8, v12
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
; CHECK-NEXT: ret
@@ -192,11 +192,11 @@ define <vscale x 16 x bfloat> @vfsub_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bf
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a0, fa0
; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT: vmv.v.x v12, a0
+; CHECK-NEXT: vmv.v.x v24, a0
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v24
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfsub.vv v16, v16, v24
+; CHECK-NEXT: vfsub.vv v16, v16, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
; CHECK-NEXT: ret
@@ -416,11 +416,11 @@ define <vscale x 4 x half> @vfsub_vf_nxv4f16(<vscale x 4 x half> %va, half %b) s
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a0
+; ZVFHMIN-NEXT: vmv.v.x v12, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfsub.vv v10, v10, v12
+; ZVFHMIN-NEXT: vfsub.vv v10, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
@@ -464,11 +464,11 @@ define <vscale x 8 x half> @vfsub_vf_nxv8f16(<vscale x 8 x half> %va, half %b) s
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a0
+; ZVFHMIN-NEXT: vmv.v.x v16, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsub.vv v12, v12, v16
+; ZVFHMIN-NEXT: vfsub.vv v12, v12, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
@@ -489,11 +489,11 @@ define <vscale x 8 x half> @vfsub_fv_nxv8f16(<vscale x 8 x half> %va, half %b) s
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a0
+; ZVFHMIN-NEXT: vmv.v.x v16, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsub.vv v12, v16, v12
+; ZVFHMIN-NEXT: vfsub.vv v12, v8, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
@@ -537,11 +537,11 @@ define <vscale x 16 x half> @vfsub_vf_nxv16f16(<vscale x 16 x half> %va, half %b
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a0
+; ZVFHMIN-NEXT: vmv.v.x v24, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24
+; ZVFHMIN-NEXT: vfsub.vv v16, v16, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll
index 2dd4fea82f9d6..70317e9d55cc7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll
@@ -189,11 +189,11 @@ define <vscale x 4 x bfloat> @vfsub_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloa
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a1, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vmv.v.x v12, a1
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9, v0.t
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfsub.vv v10, v10, v12, v0.t
+; CHECK-NEXT: vfsub.vv v10, v10, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t
; CHECK-NEXT: ret
@@ -208,11 +208,11 @@ define <vscale x 4 x bfloat> @vfsub_vf_nxv4bf16_unmasked(<vscale x 4 x bfloat> %
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a1, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vmv.v.x v12, a1
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfsub.vv v10, v10, v12
+; CHECK-NEXT: vfsub.vv v10, v10, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
; CHECK-NEXT: ret
@@ -259,11 +259,11 @@ define <vscale x 8 x bfloat> @vfsub_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a1, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vmv.v.x v16, a1
; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10, v0.t
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfsub.vv v12, v12, v16, v0.t
+; CHECK-NEXT: vfsub.vv v12, v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t
; CHECK-NEXT: ret
@@ -278,11 +278,11 @@ define <vscale x 8 x bfloat> @vfsub_vf_nxv8bf16_unmasked(<vscale x 8 x bfloat> %
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a1, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vmv.v.x v16, a1
; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfsub.vv v12, v12, v16
+; CHECK-NEXT: vfsub.vv v12, v12, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
; CHECK-NEXT: ret
@@ -329,11 +329,11 @@ define <vscale x 16 x bfloat> @vfsub_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bf
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a1, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vmv.v.x v24, a1
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfsub.vv v16, v16, v24, v0.t
+; CHECK-NEXT: vfsub.vv v16, v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t
; CHECK-NEXT: ret
@@ -348,11 +348,11 @@ define <vscale x 16 x bfloat> @vfsub_vf_nxv16bf16_unmasked(<vscale x 16 x bfloat
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.h a1, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vmv.v.x v24, a1
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v24
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfsub.vv v16, v16, v24
+; CHECK-NEXT: vfsub.vv v16, v16, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
; CHECK-NEXT: ret
@@ -837,11 +837,11 @@ define <vscale x 4 x half> @vfsub_vf_nxv4f16(<vscale x 4 x half> %va, half %b, <
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfsub.vv v10, v10, v12, v0.t
+; ZVFHMIN-NEXT: vfsub.vv v10, v10, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t
; ZVFHMIN-NEXT: ret
@@ -862,11 +862,11 @@ define <vscale x 4 x half> @vfsub_vf_nxv4f16_unmasked(<vscale x 4 x half> %va, h
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfsub.vv v10, v10, v12
+; ZVFHMIN-NEXT: vfsub.vv v10, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
@@ -931,11 +931,11 @@ define <vscale x 8 x half> @vfsub_vf_nxv8f16(<vscale x 8 x half> %va, half %b, <
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsub.vv v12, v12, v16, v0.t
+; ZVFHMIN-NEXT: vfsub.vv v12, v12, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
; ZVFHMIN-NEXT: ret
@@ -956,11 +956,11 @@ define <vscale x 8 x half> @vfsub_vf_nxv8f16_unmasked(<vscale x 8 x half> %va, h
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsub.vv v12, v12, v16
+; ZVFHMIN-NEXT: vfsub.vv v12, v12, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
@@ -1025,11 +1025,11 @@ define <vscale x 16 x half> @vfsub_vf_nxv16f16(<vscale x 16 x half> %va, half %b
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vmv.v.x v24, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24, v0.t
+; ZVFHMIN-NEXT: vfsub.vv v16, v16, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t
; ZVFHMIN-NEXT: ret
@@ -1050,11 +1050,11 @@ define <vscale x 16 x half> @vfsub_vf_nxv16f16_unmasked(<vscale x 16 x half> %va
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
+; ZVFHMIN-NEXT: vmv.v.x v24, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24
+; ZVFHMIN-NEXT: vfsub.vv v16, v16, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd-sdnode.ll
index f7d287a088cc3..8d41514161c61 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd-sdnode.ll
@@ -88,8 +88,9 @@ define <vscale x 2 x double> @vfwadd_vv_nxv2f64(<vscale x 2 x float> %va, <vscal
; CHECK-LABEL: vfwadd_vv_nxv2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfwadd.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vfwadd.vv v8, v11, v10
; CHECK-NEXT: ret
%vc = fpext <vscale x 2 x float> %va to <vscale x 2 x double>
%vd = fpext <vscale x 2 x float> %vb to <vscale x 2 x double>
@@ -101,8 +102,8 @@ define <vscale x 2 x double> @vfwadd_vf_nxv2f64(<vscale x 2 x float> %va, float
; CHECK-LABEL: vfwadd_vf_nxv2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfwadd.vf v10, v8, fa0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwadd.vf v8, v10, fa0
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x float> poison, float %b, i32 0
%splat = shufflevector <vscale x 2 x float> %head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
@@ -116,8 +117,8 @@ define <vscale x 2 x double> @vfwadd_vf_nxv2f64_2(<vscale x 2 x float> %va, floa
; CHECK-LABEL: vfwadd_vf_nxv2f64_2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfwadd.vf v10, v8, fa0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwadd.vf v8, v10, fa0
; CHECK-NEXT: ret
%fpext = fpext float %b to double
%head = insertelement <vscale x 2 x double> poison, double %fpext, i32 0
@@ -168,8 +169,9 @@ define <vscale x 4 x double> @vfwadd_vv_nxv4f64(<vscale x 4 x float> %va, <vscal
; CHECK-LABEL: vfwadd_vv_nxv4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfwadd.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vfwadd.vv v8, v14, v12
; CHECK-NEXT: ret
%vc = fpext <vscale x 4 x float> %va to <vscale x 4 x double>
%vd = fpext <vscale x 4 x float> %vb to <vscale x 4 x double>
@@ -181,8 +183,8 @@ define <vscale x 4 x double> @vfwadd_vf_nxv4f64(<vscale x 4 x float> %va, float
; CHECK-LABEL: vfwadd_vf_nxv4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfwadd.vf v12, v8, fa0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwadd.vf v8, v12, fa0
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x float> poison, float %b, i32 0
%splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -196,8 +198,8 @@ define <vscale x 4 x double> @vfwadd_vf_nxv4f64_2(<vscale x 4 x float> %va, floa
; CHECK-LABEL: vfwadd_vf_nxv4f64_2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfwadd.vf v12, v8, fa0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwadd.vf v8, v12, fa0
; CHECK-NEXT: ret
%fpext = fpext float %b to double
%head = insertelement <vscale x 4 x double> poison, double %fpext, i32 0
@@ -248,8 +250,9 @@ define <vscale x 8 x double> @vfwadd_vv_nxv8f64(<vscale x 8 x float> %va, <vscal
; CHECK-LABEL: vfwadd_vv_nxv8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfwadd.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vfwadd.vv v8, v20, v16
; CHECK-NEXT: ret
%vc = fpext <vscale x 8 x float> %va to <vscale x 8 x double>
%vd = fpext <vscale x 8 x float> %vb to <vscale x 8 x double>
@@ -261,8 +264,8 @@ define <vscale x 8 x double> @vfwadd_vf_nxv8f64(<vscale x 8 x float> %va, float
; CHECK-LABEL: vfwadd_vf_nxv8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfwadd.vf v16, v8, fa0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwadd.vf v8, v16, fa0
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x float> poison, float %b, i32 0
%splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -276,8 +279,8 @@ define <vscale x 8 x double> @vfwadd_vf_nxv8f64_2(<vscale x 8 x float> %va, floa
; CHECK-LABEL: vfwadd_vf_nxv8f64_2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfwadd.vf v16, v8, fa0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwadd.vf v8, v16, fa0
; CHECK-NEXT: ret
%fpext = fpext float %b to double
%head = insertelement <vscale x 8 x double> poison, double %fpext, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd.ll
index d980803cb3897..df8ece342d9ee 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwadd.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd.ll
@@ -115,11 +115,12 @@ declare <vscale x 4 x float> @llvm.riscv.vfwadd.nxv4f32.nxv4f16.nxv4f16(
define <vscale x 4 x float> @intrinsic_vfwadd_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwadd_vv_nxv4f32_nxv4f16_nxv4f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vfwadd.vv v10, v8, v9
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.vv v8, v11, v10
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x float> @llvm.riscv.vfwadd.nxv4f32.nxv4f16.nxv4f16(
@@ -166,11 +167,12 @@ declare <vscale x 8 x float> @llvm.riscv.vfwadd.nxv8f32.nxv8f16.nxv8f16(
define <vscale x 8 x float> @intrinsic_vfwadd_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwadd_vv_nxv8f32_nxv8f16_nxv8f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vfwadd.vv v12, v8, v10
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.vv v8, v14, v12
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x float> @llvm.riscv.vfwadd.nxv8f32.nxv8f16.nxv8f16(
@@ -217,11 +219,12 @@ declare <vscale x 16 x float> @llvm.riscv.vfwadd.nxv16f32.nxv16f16.nxv16f16(
define <vscale x 16 x float> @intrinsic_vfwadd_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwadd_vv_nxv16f32_nxv16f16_nxv16f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vfwadd.vv v16, v8, v12
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.vv v8, v20, v16
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x float> @llvm.riscv.vfwadd.nxv16f32.nxv16f16.nxv16f16(
@@ -319,11 +322,12 @@ declare <vscale x 2 x double> @llvm.riscv.vfwadd.nxv2f64.nxv2f32.nxv2f32(
define <vscale x 2 x double> @intrinsic_vfwadd_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwadd_vv_nxv2f64_nxv2f32_nxv2f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwadd.vv v10, v8, v9
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.vv v8, v11, v10
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x double> @llvm.riscv.vfwadd.nxv2f64.nxv2f32.nxv2f32(
@@ -370,11 +374,12 @@ declare <vscale x 4 x double> @llvm.riscv.vfwadd.nxv4f64.nxv4f32.nxv4f32(
define <vscale x 4 x double> @intrinsic_vfwadd_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwadd_vv_nxv4f64_nxv4f32_nxv4f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vfwadd.vv v12, v8, v10
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.vv v8, v14, v12
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x double> @llvm.riscv.vfwadd.nxv4f64.nxv4f32.nxv4f32(
@@ -421,11 +426,12 @@ declare <vscale x 8 x double> @llvm.riscv.vfwadd.nxv8f64.nxv8f32.nxv8f32(
define <vscale x 8 x double> @intrinsic_vfwadd_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwadd_vv_nxv8f64_nxv8f32_nxv8f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vfwadd.vv v16, v8, v12
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.vv v8, v20, v16
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x double> @llvm.riscv.vfwadd.nxv8f64.nxv8f32.nxv8f32(
@@ -574,11 +580,11 @@ declare <vscale x 4 x float> @llvm.riscv.vfwadd.nxv4f32.nxv4f16.f16(
define <vscale x 4 x float> @intrinsic_vfwadd_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x half> %0, half %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwadd_vf_nxv4f32_nxv4f16_f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vfwadd.vf v10, v8, fa0
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.vf v8, v10, fa0
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x float> @llvm.riscv.vfwadd.nxv4f32.nxv4f16.f16(
@@ -625,11 +631,11 @@ declare <vscale x 8 x float> @llvm.riscv.vfwadd.nxv8f32.nxv8f16.f16(
define <vscale x 8 x float> @intrinsic_vfwadd_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x half> %0, half %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwadd_vf_nxv8f32_nxv8f16_f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vfwadd.vf v12, v8, fa0
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.vf v8, v12, fa0
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x float> @llvm.riscv.vfwadd.nxv8f32.nxv8f16.f16(
@@ -676,11 +682,11 @@ declare <vscale x 16 x float> @llvm.riscv.vfwadd.nxv16f32.nxv16f16.f16(
define <vscale x 16 x float> @intrinsic_vfwadd_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x half> %0, half %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwadd_vf_nxv16f32_nxv16f16_f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vfwadd.vf v16, v8, fa0
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.vf v8, v16, fa0
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x float> @llvm.riscv.vfwadd.nxv16f32.nxv16f16.f16(
@@ -778,11 +784,11 @@ declare <vscale x 2 x double> @llvm.riscv.vfwadd.nxv2f64.nxv2f32.f32(
define <vscale x 2 x double> @intrinsic_vfwadd_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x float> %0, float %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwadd_vf_nxv2f64_nxv2f32_f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwadd.vf v10, v8, fa0
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.vf v8, v10, fa0
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x double> @llvm.riscv.vfwadd.nxv2f64.nxv2f32.f32(
@@ -829,11 +835,11 @@ declare <vscale x 4 x double> @llvm.riscv.vfwadd.nxv4f64.nxv4f32.f32(
define <vscale x 4 x double> @intrinsic_vfwadd_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x float> %0, float %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwadd_vf_nxv4f64_nxv4f32_f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vfwadd.vf v12, v8, fa0
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.vf v8, v12, fa0
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x double> @llvm.riscv.vfwadd.nxv4f64.nxv4f32.f32(
@@ -880,11 +886,11 @@ declare <vscale x 8 x double> @llvm.riscv.vfwadd.nxv8f64.nxv8f32.f32(
define <vscale x 8 x double> @intrinsic_vfwadd_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x float> %0, float %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwadd_vf_nxv8f64_nxv8f32_f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vfwadd.vf v16, v8, fa0
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.vf v8, v16, fa0
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x double> @llvm.riscv.vfwadd.nxv8f64.nxv8f32.f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w.ll
index 8eb2a2c0391b5..8e97ba581d55c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w.ll
@@ -1289,11 +1289,11 @@ entry:
define <vscale x 4 x float> @intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x float> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vfwadd.wv v12, v10, v8
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.wv v8, v10, v12
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.nxv4f16(
@@ -1308,11 +1308,11 @@ entry:
define <vscale x 8 x float> @intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x float> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vfwadd.wv v16, v12, v8
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.wv v8, v12, v16
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.nxv8f16(
@@ -1346,11 +1346,11 @@ entry:
define <vscale x 2 x double> @intrinsic_vfwadd.w_wv_untie_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x double> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv2f64_nxv2f64_nxv2f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwadd.wv v12, v10, v8
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.wv v8, v10, v12
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x double> @llvm.riscv.vfwadd.w.nxv2f64.nxv2f32(
@@ -1365,11 +1365,11 @@ entry:
define <vscale x 4 x double> @intrinsic_vfwadd.w_wv_untie_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x double> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f64_nxv4f64_nxv4f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vfwadd.wv v16, v12, v8
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.wv v8, v12, v16
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32(
@@ -1384,11 +1384,11 @@ entry:
define <vscale x 8 x double> @intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x double> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vfwadd.wv v24, v16, v8
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vmv4r.v v24, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwadd.wv v8, v16, v24
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x double> @llvm.riscv.vfwadd.w.nxv8f64.nxv8f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f.ll
index 4f03188cf3806..0ac2d472c70fb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f.ll
@@ -102,8 +102,8 @@ define <vscale x 4 x float> @intrinsic_vfwcvt_f.f.v_nxv4f32_nxv4f16(<vscale x 4
; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv4f32_nxv4f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.f.v v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x float> @llvm.riscv.vfwcvt.f.f.v.nxv4f32.nxv4f16(
@@ -145,8 +145,8 @@ define <vscale x 8 x float> @intrinsic_vfwcvt_f.f.v_nxv8f32_nxv8f16(<vscale x 8
; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv8f32_nxv8f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.f.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.f.f.v v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x float> @llvm.riscv.vfwcvt.f.f.v.nxv8f32.nxv8f16(
@@ -188,8 +188,8 @@ define <vscale x 16 x float> @intrinsic_vfwcvt_f.f.v_nxv16f32_nxv16f16(<vscale x
; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv16f32_nxv16f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvt.f.f.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.f.f.v v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x float> @llvm.riscv.vfwcvt.f.f.v.nxv16f32.nxv16f16(
@@ -274,8 +274,8 @@ define <vscale x 2 x double> @intrinsic_vfwcvt_f.f.v_nxv2f64_nxv2f32(<vscale x 2
; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv2f64_nxv2f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.f.v v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x double> @llvm.riscv.vfwcvt.f.f.v.nxv2f64.nxv2f32(
@@ -317,8 +317,8 @@ define <vscale x 4 x double> @intrinsic_vfwcvt_f.f.v_nxv4f64_nxv4f32(<vscale x 4
; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv4f64_nxv4f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.f.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.f.f.v v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x double> @llvm.riscv.vfwcvt.f.f.v.nxv4f64.nxv4f32(
@@ -360,8 +360,8 @@ define <vscale x 8 x double> @intrinsic_vfwcvt_f.f.v_nxv8f64_nxv8f32(<vscale x 8
; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv8f64_nxv8f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vfwcvt.f.f.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.f.f.v v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x double> @llvm.riscv.vfwcvt.f.f.v.nxv8f64.nxv8f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x.ll
index 3c1e07b4a5ef4..cb6db0de733a8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x.ll
@@ -142,8 +142,8 @@ define <vscale x 8 x half> @intrinsic_vfwcvt_f.x.v_nxv8f16_nxv8i8(<vscale x 8 x
; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv8f16_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x half> @llvm.riscv.vfwcvt.f.x.v.nxv8f16.nxv8i8(
@@ -185,8 +185,8 @@ define <vscale x 16 x half> @intrinsic_vfwcvt_f.x.v_nxv16f16_nxv16i8(<vscale x 1
; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv16f16_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x half> @llvm.riscv.vfwcvt.f.x.v.nxv16f16.nxv16i8(
@@ -228,8 +228,8 @@ define <vscale x 32 x half> @intrinsic_vfwcvt_f.x.v_nxv32f16_nxv32i8(<vscale x 3
; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv32f16_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x half> @llvm.riscv.vfwcvt.f.x.v.nxv32f16.nxv32i8(
@@ -357,8 +357,8 @@ define <vscale x 4 x float> @intrinsic_vfwcvt_f.x.v_nxv4f32_nxv4i16(<vscale x 4
; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv4f32_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x float> @llvm.riscv.vfwcvt.f.x.v.nxv4f32.nxv4i16(
@@ -400,8 +400,8 @@ define <vscale x 8 x float> @intrinsic_vfwcvt_f.x.v_nxv8f32_nxv8i16(<vscale x 8
; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv8f32_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x float> @llvm.riscv.vfwcvt.f.x.v.nxv8f32.nxv8i16(
@@ -443,8 +443,8 @@ define <vscale x 16 x float> @intrinsic_vfwcvt_f.x.v_nxv16f32_nxv16i16(<vscale x
; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv16f32_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x float> @llvm.riscv.vfwcvt.f.x.v.nxv16f32.nxv16i16(
@@ -529,8 +529,8 @@ define <vscale x 2 x double> @intrinsic_vfwcvt_f.x.v_nxv2f64_nxv2i32(<vscale x 2
; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv2f64_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x double> @llvm.riscv.vfwcvt.f.x.v.nxv2f64.nxv2i32(
@@ -572,8 +572,8 @@ define <vscale x 4 x double> @intrinsic_vfwcvt_f.x.v_nxv4f64_nxv4i32(<vscale x 4
; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv4f64_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x double> @llvm.riscv.vfwcvt.f.x.v.nxv4f64.nxv4i32(
@@ -615,8 +615,8 @@ define <vscale x 8 x double> @intrinsic_vfwcvt_f.x.v_nxv8f64_nxv8i32(<vscale x 8
; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv8f64_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x double> @llvm.riscv.vfwcvt.f.x.v.nxv8f64.nxv8i32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu.ll
index 17ea8f50a6943..59dacdaa82ea3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu.ll
@@ -142,8 +142,8 @@ define <vscale x 8 x half> @intrinsic_vfwcvt_f.xu.v_nxv8f16_nxv8i8(<vscale x 8 x
; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv8f16_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv8f16.nxv8i8(
@@ -185,8 +185,8 @@ define <vscale x 16 x half> @intrinsic_vfwcvt_f.xu.v_nxv16f16_nxv16i8(<vscale x
; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv16f16_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv16f16.nxv16i8(
@@ -228,8 +228,8 @@ define <vscale x 32 x half> @intrinsic_vfwcvt_f.xu.v_nxv32f16_nxv32i8(<vscale x
; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv32f16_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv32f16.nxv32i8(
@@ -357,8 +357,8 @@ define <vscale x 4 x float> @intrinsic_vfwcvt_f.xu.v_nxv4f32_nxv4i16(<vscale x 4
; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv4f32_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x float> @llvm.riscv.vfwcvt.f.xu.v.nxv4f32.nxv4i16(
@@ -400,8 +400,8 @@ define <vscale x 8 x float> @intrinsic_vfwcvt_f.xu.v_nxv8f32_nxv8i16(<vscale x 8
; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv8f32_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x float> @llvm.riscv.vfwcvt.f.xu.v.nxv8f32.nxv8i16(
@@ -443,8 +443,8 @@ define <vscale x 16 x float> @intrinsic_vfwcvt_f.xu.v_nxv16f32_nxv16i16(<vscale
; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv16f32_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x float> @llvm.riscv.vfwcvt.f.xu.v.nxv16f32.nxv16i16(
@@ -529,8 +529,8 @@ define <vscale x 2 x double> @intrinsic_vfwcvt_f.xu.v_nxv2f64_nxv2i32(<vscale x
; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv2f64_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x double> @llvm.riscv.vfwcvt.f.xu.v.nxv2f64.nxv2i32(
@@ -572,8 +572,8 @@ define <vscale x 4 x double> @intrinsic_vfwcvt_f.xu.v_nxv4f64_nxv4i32(<vscale x
; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv4f64_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x double> @llvm.riscv.vfwcvt.f.xu.v.nxv4f64.nxv4i32(
@@ -615,8 +615,8 @@ define <vscale x 8 x double> @intrinsic_vfwcvt_f.xu.v_nxv8f64_nxv8i32(<vscale x
; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv8f64_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x double> @llvm.riscv.vfwcvt.f.xu.v.nxv8f64.nxv8i32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f.ll
index f9f426cd3c9b6..3d1fdda3af3ad 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f.ll
@@ -101,8 +101,8 @@ define <vscale x 4 x i32> @intrinsic_vfwcvt_rtz.x.f.v_nxv4i32_nxv4f16(<vscale x
; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv4i32_nxv4f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv4i32.nxv4f16(
@@ -145,8 +145,8 @@ define <vscale x 8 x i32> @intrinsic_vfwcvt_rtz.x.f.v_nxv8i32_nxv8f16(<vscale x
; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv8i32_nxv8f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv8i32.nxv8f16(
@@ -189,8 +189,8 @@ define <vscale x 16 x i32> @intrinsic_vfwcvt_rtz.x.f.v_nxv16i32_nxv16f16(<vscale
; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv16i32_nxv16f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.x.f.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv16i32.nxv16f16(
@@ -277,8 +277,8 @@ define <vscale x 2 x i64> @intrinsic_vfwcvt_rtz.x.f.v_nxv2i64_nxv2f32(<vscale x
; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv2i64_nxv2f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv2i64.nxv2f32(
@@ -321,8 +321,8 @@ define <vscale x 4 x i64> @intrinsic_vfwcvt_rtz.x.f.v_nxv4i64_nxv4f32(<vscale x
; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv4i64_nxv4f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv4i64.nxv4f32(
@@ -365,8 +365,8 @@ define <vscale x 8 x i64> @intrinsic_vfwcvt_rtz.x.f.v_nxv8i64_nxv8f32(<vscale x
; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv8i64_nxv8f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.x.f.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv8i64.nxv8f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f.ll
index 61d2ad5bf892a..8eed12f2d2e72 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f.ll
@@ -101,8 +101,8 @@ define <vscale x 4 x i32> @intrinsic_vfwcvt_rtz.xu.f.v_nxv4i32_nxv4f16(<vscale x
; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv4i32_nxv4f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv4i32.nxv4f16(
@@ -145,8 +145,8 @@ define <vscale x 8 x i32> @intrinsic_vfwcvt_rtz.xu.f.v_nxv8i32_nxv8f16(<vscale x
; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv8i32_nxv8f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv8i32.nxv8f16(
@@ -189,8 +189,8 @@ define <vscale x 16 x i32> @intrinsic_vfwcvt_rtz.xu.f.v_nxv16i32_nxv16f16(<vscal
; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv16i32_nxv16f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.xu.f.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv16i32.nxv16f16(
@@ -277,8 +277,8 @@ define <vscale x 2 x i64> @intrinsic_vfwcvt_rtz.xu.f.v_nxv2i64_nxv2f32(<vscale x
; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv2i64_nxv2f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv2i64.nxv2f32(
@@ -321,8 +321,8 @@ define <vscale x 4 x i64> @intrinsic_vfwcvt_rtz.xu.f.v_nxv4i64_nxv4f32(<vscale x
; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv4i64_nxv4f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv4i64.nxv4f32(
@@ -365,8 +365,8 @@ define <vscale x 8 x i64> @intrinsic_vfwcvt_rtz.xu.f.v_nxv8i64_nxv8f32(<vscale x
; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv8i64_nxv8f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vfwcvt.rtz.xu.f.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv8i64.nxv8f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f.ll
index 8b545585c56d0..da52e05a45f11 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f.ll
@@ -106,11 +106,11 @@ declare <vscale x 4 x i32> @llvm.riscv.vfwcvt.x.f.v.nxv4i32.nxv4f16(
define <vscale x 4 x i32> @intrinsic_vfwcvt_x.f.v_nxv4i32_nxv4f16(<vscale x 4 x half> %0, iXLen %1) nounwind {
; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv4i32_nxv4f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvt.x.f.v v10, v8
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwcvt.x.f.v v8, v10
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vfwcvt.x.f.v.nxv4i32.nxv4f16(
@@ -153,11 +153,11 @@ declare <vscale x 8 x i32> @llvm.riscv.vfwcvt.x.f.v.nxv8i32.nxv8f16(
define <vscale x 8 x i32> @intrinsic_vfwcvt_x.f.v_nxv8i32_nxv8f16(<vscale x 8 x half> %0, iXLen %1) nounwind {
; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv8i32_nxv8f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvt.x.f.v v12, v8
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwcvt.x.f.v v8, v12
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vfwcvt.x.f.v.nxv8i32.nxv8f16(
@@ -200,11 +200,11 @@ declare <vscale x 16 x i32> @llvm.riscv.vfwcvt.x.f.v.nxv16i32.nxv16f16(
define <vscale x 16 x i32> @intrinsic_vfwcvt_x.f.v_nxv16i32_nxv16f16(<vscale x 16 x half> %0, iXLen %1) nounwind {
; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv16i32_nxv16f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvt.x.f.v v16, v8
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwcvt.x.f.v v8, v16
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vfwcvt.x.f.v.nxv16i32.nxv16f16(
@@ -294,11 +294,11 @@ declare <vscale x 2 x i64> @llvm.riscv.vfwcvt.x.f.v.nxv2i64.nxv2f32(
define <vscale x 2 x i64> @intrinsic_vfwcvt_x.f.v_nxv2i64_nxv2f32(<vscale x 2 x float> %0, iXLen %1) nounwind {
; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv2i64_nxv2f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.x.f.v v10, v8
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwcvt.x.f.v v8, v10
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vfwcvt.x.f.v.nxv2i64.nxv2f32(
@@ -341,11 +341,11 @@ declare <vscale x 4 x i64> @llvm.riscv.vfwcvt.x.f.v.nxv4i64.nxv4f32(
define <vscale x 4 x i64> @intrinsic_vfwcvt_x.f.v_nxv4i64_nxv4f32(<vscale x 4 x float> %0, iXLen %1) nounwind {
; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv4i64_nxv4f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vfwcvt.x.f.v v12, v8
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwcvt.x.f.v v8, v12
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vfwcvt.x.f.v.nxv4i64.nxv4f32(
@@ -388,11 +388,11 @@ declare <vscale x 8 x i64> @llvm.riscv.vfwcvt.x.f.v.nxv8i64.nxv8f32(
define <vscale x 8 x i64> @intrinsic_vfwcvt_x.f.v_nxv8i64_nxv8f32(<vscale x 8 x float> %0, iXLen %1) nounwind {
; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv8i64_nxv8f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vfwcvt.x.f.v v16, v8
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwcvt.x.f.v v8, v16
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vfwcvt.x.f.v.nxv8i64.nxv8f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f.ll
index 476e2398c479d..01a82998d617c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f.ll
@@ -106,11 +106,11 @@ declare <vscale x 4 x i32> @llvm.riscv.vfwcvt.xu.f.v.nxv4i32.nxv4f16(
define <vscale x 4 x i32> @intrinsic_vfwcvt_xu.f.v_nxv4i32_nxv4f16(<vscale x 4 x half> %0, iXLen %1) nounwind {
; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv4i32_nxv4f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvt.xu.f.v v10, v8
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwcvt.xu.f.v v8, v10
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vfwcvt.xu.f.v.nxv4i32.nxv4f16(
@@ -153,11 +153,11 @@ declare <vscale x 8 x i32> @llvm.riscv.vfwcvt.xu.f.v.nxv8i32.nxv8f16(
define <vscale x 8 x i32> @intrinsic_vfwcvt_xu.f.v_nxv8i32_nxv8f16(<vscale x 8 x half> %0, iXLen %1) nounwind {
; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv8i32_nxv8f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvt.xu.f.v v12, v8
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwcvt.xu.f.v v8, v12
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vfwcvt.xu.f.v.nxv8i32.nxv8f16(
@@ -200,11 +200,11 @@ declare <vscale x 16 x i32> @llvm.riscv.vfwcvt.xu.f.v.nxv16i32.nxv16f16(
define <vscale x 16 x i32> @intrinsic_vfwcvt_xu.f.v_nxv16i32_nxv16f16(<vscale x 16 x half> %0, iXLen %1) nounwind {
; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv16i32_nxv16f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvt.xu.f.v v16, v8
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwcvt.xu.f.v v8, v16
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vfwcvt.xu.f.v.nxv16i32.nxv16f16(
@@ -294,11 +294,11 @@ declare <vscale x 2 x i64> @llvm.riscv.vfwcvt.xu.f.v.nxv2i64.nxv2f32(
define <vscale x 2 x i64> @intrinsic_vfwcvt_xu.f.v_nxv2i64_nxv2f32(<vscale x 2 x float> %0, iXLen %1) nounwind {
; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv2i64_nxv2f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.xu.f.v v10, v8
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwcvt.xu.f.v v8, v10
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vfwcvt.xu.f.v.nxv2i64.nxv2f32(
@@ -341,11 +341,11 @@ declare <vscale x 4 x i64> @llvm.riscv.vfwcvt.xu.f.v.nxv4i64.nxv4f32(
define <vscale x 4 x i64> @intrinsic_vfwcvt_xu.f.v_nxv4i64_nxv4f32(<vscale x 4 x float> %0, iXLen %1) nounwind {
; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv4i64_nxv4f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vfwcvt.xu.f.v v12, v8
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwcvt.xu.f.v v8, v12
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vfwcvt.xu.f.v.nxv4i64.nxv4f32(
@@ -388,11 +388,11 @@ declare <vscale x 8 x i64> @llvm.riscv.vfwcvt.xu.f.v.nxv8i64.nxv8f32(
define <vscale x 8 x i64> @intrinsic_vfwcvt_xu.f.v_nxv8i64_nxv8f32(<vscale x 8 x float> %0, iXLen %1) nounwind {
; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv8i64_nxv8f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vfwcvt.xu.f.v v16, v8
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwcvt.xu.f.v v8, v16
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vfwcvt.xu.f.v.nxv8i64.nxv8f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvtbf16-f-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvtbf16-f-f.ll
index 1cc2b21f87a4c..d232f83d07c10 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvtbf16-f-f.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvtbf16-f-f.ll
@@ -101,8 +101,8 @@ define <vscale x 4 x float> @intrinsic_vfwcvtbf16_f.f.v_nxv4f32_nxv4bf16(<vscale
; CHECK-LABEL: intrinsic_vfwcvtbf16_f.f.v_nxv4f32_nxv4bf16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x float> @llvm.riscv.vfwcvtbf16.f.f.v.nxv4f32.nxv4bf16(
@@ -145,8 +145,8 @@ define <vscale x 8 x float> @intrinsic_vfwcvtbf16_f.f.v_nxv8f32_nxv8bf16(<vscale
; CHECK-LABEL: intrinsic_vfwcvtbf16_f.f.v_nxv8f32_nxv8bf16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x float> @llvm.riscv.vfwcvtbf16.f.f.v.nxv8f32.nxv8bf16(
@@ -189,8 +189,8 @@ define <vscale x 16 x float> @intrinsic_vfwcvtbf16_f.f.v_nxv16f32_nxv16bf16(<vsc
; CHECK-LABEL: intrinsic_vfwcvtbf16_f.f.v_nxv16f32_nxv16bf16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x float> @llvm.riscv.vfwcvtbf16.f.f.v.nxv16f32.nxv16bf16(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll
index b1e36634e481a..4ef7ea5b52a75 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll
@@ -414,11 +414,11 @@ define <vscale x 4 x float> @vfmacc_vv_nxv4f32(<vscale x 4 x half> %a, <vscale x
; ZVFHMIN-LABEL: vfmacc_vv_nxv4f32:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv1r.v v12, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v12, v14, v10, v0.t
-; ZVFHMIN-NEXT: vmv.v.v v8, v12
+; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v10, v0.t
; ZVFHMIN-NEXT: ret
%aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> %m, i32 %evl)
%bext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl)
@@ -437,11 +437,11 @@ define <vscale x 4 x float> @vfmacc_vv_nxv4f32_unmasked(<vscale x 4 x half> %a,
; ZVFHMIN-LABEL: vfmacc_vv_nxv4f32_unmasked:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv1r.v v12, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v12, v14, v10
-; ZVFHMIN-NEXT: vmv.v.v v8, v12
+; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v10
; ZVFHMIN-NEXT: ret
%aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
%bext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
@@ -461,11 +461,11 @@ define <vscale x 4 x float> @vfmacc_vf_nxv4f32(<vscale x 4 x half> %va, half %b,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT: vmv.v.x v14, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v14, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v10, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v8, v12, v10, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -487,11 +487,11 @@ define <vscale x 4 x float> @vfmacc_vf_nxv4f32_unmasked(<vscale x 4 x half> %va,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT: vmv.v.x v14, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v14
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v10
+; ZVFHMIN-NEXT: vfmadd.vv v8, v12, v10
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -518,11 +518,11 @@ define <vscale x 8 x float> @vfmacc_vv_nxv8f32(<vscale x 8 x half> %a, <vscale x
; ZVFHMIN-LABEL: vfmacc_vv_nxv8f32:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vmv2r.v v16, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v20, v12, v0.t
-; ZVFHMIN-NEXT: vmv.v.v v8, v16
+; ZVFHMIN-NEXT: vfmadd.vv v8, v20, v12, v0.t
; ZVFHMIN-NEXT: ret
%aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %m, i32 %evl)
%bext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl)
@@ -541,11 +541,11 @@ define <vscale x 8 x float> @vfmacc_vv_nxv8f32_unmasked(<vscale x 8 x half> %a,
; ZVFHMIN-LABEL: vfmacc_vv_nxv8f32_unmasked:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vmv2r.v v16, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v20, v12
-; ZVFHMIN-NEXT: vmv.v.v v8, v16
+; ZVFHMIN-NEXT: vfmadd.vv v8, v20, v12
; ZVFHMIN-NEXT: ret
%aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
%bext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
@@ -565,11 +565,11 @@ define <vscale x 8 x float> @vfmacc_vf_nxv8f32(<vscale x 8 x half> %va, half %b,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v16, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT: vmv.v.x v20, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v8, v20, v12, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v12, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -591,11 +591,11 @@ define <vscale x 8 x float> @vfmacc_vf_nxv8f32_unmasked(<vscale x 8 x half> %va,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v16, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT: vmv.v.x v20, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v8, v20, v12
+; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v12
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -623,12 +623,10 @@ define <vscale x 16 x float> @vfmacc_vv_nxv16f32(<vscale x 16 x half> %a, <vscal
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFHMIN-NEXT: vmv4r.v v4, v12
-; ZVFHMIN-NEXT: vmv4r.v v24, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t
-; ZVFHMIN-NEXT: vmv.v.v v8, v24
+; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t
; ZVFHMIN-NEXT: ret
%aext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x i1> %m, i32 %evl)
%bext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %b, <vscale x 16 x i1> %m, i32 %evl)
@@ -647,11 +645,11 @@ define <vscale x 16 x float> @vfmacc_vv_nxv16f32_unmasked(<vscale x 16 x half> %
; ZVFHMIN-LABEL: vfmacc_vv_nxv16f32_unmasked:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vmv4r.v v24, v12
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v0, v16
-; ZVFHMIN-NEXT: vmv.v.v v8, v24
+; ZVFHMIN-NEXT: vfmadd.vv v8, v0, v16
; ZVFHMIN-NEXT: ret
%aext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
%bext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
@@ -697,11 +695,11 @@ define <vscale x 16 x float> @vfmacc_vf_nxv16f32_unmasked(<vscale x 16 x half> %
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v24, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
+; ZVFHMIN-NEXT: vmv.v.x v4, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v8, v0, v16
+; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll
index 94b80075ac14c..36f8e99b27383 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll
@@ -338,11 +338,11 @@ define <vscale x 4 x float> @vmfsac_vv_nxv4f32(<vscale x 4 x half> %a, <vscale x
; ZVFHMIN-LABEL: vmfsac_vv_nxv4f32:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv1r.v v12, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmsub.vv v12, v14, v10, v0.t
-; ZVFHMIN-NEXT: vmv.v.v v8, v12
+; ZVFHMIN-NEXT: vfmsub.vv v8, v14, v10, v0.t
; ZVFHMIN-NEXT: ret
%aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> %m, i32 %evl)
%bext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl)
@@ -362,11 +362,11 @@ define <vscale x 4 x float> @vmfsac_vv_nxv4f32_unmasked(<vscale x 4 x half> %a,
; ZVFHMIN-LABEL: vmfsac_vv_nxv4f32_unmasked:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv1r.v v12, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmsub.vv v12, v14, v10
-; ZVFHMIN-NEXT: vmv.v.v v8, v12
+; ZVFHMIN-NEXT: vfmsub.vv v8, v14, v10
; ZVFHMIN-NEXT: ret
%aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
%bext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
@@ -387,11 +387,11 @@ define <vscale x 4 x float> @vmfsac_vf_nxv4f32(<vscale x 4 x half> %a, half %b,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT: vmv.v.x v14, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v14, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmsub.vv v8, v14, v10, v0.t
+; ZVFHMIN-NEXT: vfmsub.vv v8, v12, v10, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -412,14 +412,14 @@ define <vscale x 4 x float> @vmfsac_vf_nxv4f32_commute(<vscale x 4 x half> %a, h
;
; ZVFHMIN-LABEL: vmfsac_vf_nxv4f32_commute:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9, v0.t
+; ZVFHMIN-NEXT: vmv1r.v v12, v8
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vmv.v.x v14, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v14, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmsub.vv v12, v14, v10, v0.t
-; ZVFHMIN-NEXT: vmv.v.v v8, v12
+; ZVFHMIN-NEXT: vfmsub.vv v8, v12, v10, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -442,11 +442,11 @@ define <vscale x 4 x float> @vmfsac_vf_nxv4f32_unmasked(<vscale x 4 x half> %a,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT: vmv.v.x v14, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v14
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmsub.vv v8, v14, v10
+; ZVFHMIN-NEXT: vfmsub.vv v8, v12, v10
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -472,11 +472,11 @@ define <vscale x 8 x float> @vmfsac_vv_nxv8f32(<vscale x 8 x half> %a, <vscale x
; ZVFHMIN-LABEL: vmfsac_vv_nxv8f32:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vmv2r.v v16, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmsub.vv v16, v20, v12, v0.t
-; ZVFHMIN-NEXT: vmv.v.v v8, v16
+; ZVFHMIN-NEXT: vfmsub.vv v8, v20, v12, v0.t
; ZVFHMIN-NEXT: ret
%aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %m, i32 %evl)
%bext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl)
@@ -496,11 +496,11 @@ define <vscale x 8 x float> @vmfsac_vv_nxv8f32_unmasked(<vscale x 8 x half> %a,
; ZVFHMIN-LABEL: vmfsac_vv_nxv8f32_unmasked:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vmv2r.v v16, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmsub.vv v16, v20, v12
-; ZVFHMIN-NEXT: vmv.v.v v8, v16
+; ZVFHMIN-NEXT: vfmsub.vv v8, v20, v12
; ZVFHMIN-NEXT: ret
%aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
%bext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
@@ -521,11 +521,11 @@ define <vscale x 8 x float> @vmfsac_vf_nxv8f32(<vscale x 8 x half> %a, half %b,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v16, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT: vmv.v.x v20, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmsub.vv v8, v20, v12, v0.t
+; ZVFHMIN-NEXT: vfmsub.vv v8, v16, v12, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -546,14 +546,14 @@ define <vscale x 8 x float> @vmfsac_vf_nxv8f32_commute(<vscale x 8 x half> %a, h
;
; ZVFHMIN-LABEL: vmfsac_vf_nxv8f32_commute:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10, v0.t
+; ZVFHMIN-NEXT: vmv2r.v v16, v8
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vmv.v.x v20, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v20, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmsub.vv v16, v20, v12, v0.t
-; ZVFHMIN-NEXT: vmv.v.v v8, v16
+; ZVFHMIN-NEXT: vfmsub.vv v8, v16, v12, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -576,11 +576,11 @@ define <vscale x 8 x float> @vmfsac_vf_nxv8f32_unmasked(<vscale x 8 x half> %a,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v16, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT: vmv.v.x v20, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmsub.vv v8, v20, v12
+; ZVFHMIN-NEXT: vfmsub.vv v8, v16, v12
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmul-sdnode.ll
index 8cc8c5cffca6b..41f2c26c15808 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmul-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmul-sdnode.ll
@@ -51,8 +51,9 @@ define <vscale x 2 x double> @vfwmul_vv_nxv2f64(<vscale x 2 x float> %va, <vscal
; CHECK-LABEL: vfwmul_vv_nxv2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfwmul.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vfwmul.vv v8, v11, v10
; CHECK-NEXT: ret
%vc = fpext <vscale x 2 x float> %va to <vscale x 2 x double>
%vd = fpext <vscale x 2 x float> %vb to <vscale x 2 x double>
@@ -64,8 +65,8 @@ define <vscale x 2 x double> @vfwmul_vf_nxv2f64(<vscale x 2 x float> %va, float
; CHECK-LABEL: vfwmul_vf_nxv2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfwmul.vf v10, v8, fa0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwmul.vf v8, v10, fa0
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x float> poison, float %b, i32 0
%splat = shufflevector <vscale x 2 x float> %head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
@@ -79,8 +80,8 @@ define <vscale x 2 x double> @vfwmul_vf_nxv2f64_2(<vscale x 2 x float> %va, floa
; CHECK-LABEL: vfwmul_vf_nxv2f64_2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfwmul.vf v10, v8, fa0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwmul.vf v8, v10, fa0
; CHECK-NEXT: ret
%fpext = fpext float %b to double
%head = insertelement <vscale x 2 x double> poison, double %fpext, i32 0
@@ -94,8 +95,9 @@ define <vscale x 4 x double> @vfwmul_vv_nxv4f64(<vscale x 4 x float> %va, <vscal
; CHECK-LABEL: vfwmul_vv_nxv4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfwmul.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vfwmul.vv v8, v14, v12
; CHECK-NEXT: ret
%vc = fpext <vscale x 4 x float> %va to <vscale x 4 x double>
%vd = fpext <vscale x 4 x float> %vb to <vscale x 4 x double>
@@ -107,8 +109,8 @@ define <vscale x 4 x double> @vfwmul_vf_nxv4f64(<vscale x 4 x float> %va, float
; CHECK-LABEL: vfwmul_vf_nxv4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfwmul.vf v12, v8, fa0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwmul.vf v8, v12, fa0
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x float> poison, float %b, i32 0
%splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -122,8 +124,8 @@ define <vscale x 4 x double> @vfwmul_vf_nxv4f64_2(<vscale x 4 x float> %va, floa
; CHECK-LABEL: vfwmul_vf_nxv4f64_2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfwmul.vf v12, v8, fa0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwmul.vf v8, v12, fa0
; CHECK-NEXT: ret
%fpext = fpext float %b to double
%head = insertelement <vscale x 4 x double> poison, double %fpext, i32 0
@@ -137,8 +139,9 @@ define <vscale x 8 x double> @vfwmul_vv_nxv8f64(<vscale x 8 x float> %va, <vscal
; CHECK-LABEL: vfwmul_vv_nxv8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfwmul.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vfwmul.vv v8, v20, v16
; CHECK-NEXT: ret
%vc = fpext <vscale x 8 x float> %va to <vscale x 8 x double>
%vd = fpext <vscale x 8 x float> %vb to <vscale x 8 x double>
@@ -150,8 +153,8 @@ define <vscale x 8 x double> @vfwmul_vf_nxv8f64(<vscale x 8 x float> %va, float
; CHECK-LABEL: vfwmul_vf_nxv8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfwmul.vf v16, v8, fa0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwmul.vf v8, v16, fa0
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x float> poison, float %b, i32 0
%splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -165,8 +168,8 @@ define <vscale x 8 x double> @vfwmul_vf_nxv8f64_2(<vscale x 8 x float> %va, floa
; CHECK-LABEL: vfwmul_vf_nxv8f64_2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfwmul.vf v16, v8, fa0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwmul.vf v8, v16, fa0
; CHECK-NEXT: ret
%fpext = fpext float %b to double
%head = insertelement <vscale x 8 x double> poison, double %fpext, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmul.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmul.ll
index dae29efc75bf8..d16c418cbf770 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmul.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmul.ll
@@ -115,11 +115,12 @@ declare <vscale x 4 x float> @llvm.riscv.vfwmul.nxv4f32.nxv4f16.nxv4f16(
define <vscale x 4 x float> @intrinsic_vfwmul_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwmul_vv_nxv4f32_nxv4f16_nxv4f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vfwmul.vv v10, v8, v9
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwmul.vv v8, v11, v10
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x float> @llvm.riscv.vfwmul.nxv4f32.nxv4f16.nxv4f16(
@@ -166,11 +167,12 @@ declare <vscale x 8 x float> @llvm.riscv.vfwmul.nxv8f32.nxv8f16.nxv8f16(
define <vscale x 8 x float> @intrinsic_vfwmul_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwmul_vv_nxv8f32_nxv8f16_nxv8f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vfwmul.vv v12, v8, v10
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwmul.vv v8, v14, v12
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x float> @llvm.riscv.vfwmul.nxv8f32.nxv8f16.nxv8f16(
@@ -217,11 +219,12 @@ declare <vscale x 16 x float> @llvm.riscv.vfwmul.nxv16f32.nxv16f16.nxv16f16(
define <vscale x 16 x float> @intrinsic_vfwmul_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwmul_vv_nxv16f32_nxv16f16_nxv16f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vfwmul.vv v16, v8, v12
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwmul.vv v8, v20, v16
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x float> @llvm.riscv.vfwmul.nxv16f32.nxv16f16.nxv16f16(
@@ -319,11 +322,12 @@ declare <vscale x 2 x double> @llvm.riscv.vfwmul.nxv2f64.nxv2f32.nxv2f32(
define <vscale x 2 x double> @intrinsic_vfwmul_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwmul_vv_nxv2f64_nxv2f32_nxv2f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwmul.vv v10, v8, v9
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwmul.vv v8, v11, v10
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x double> @llvm.riscv.vfwmul.nxv2f64.nxv2f32.nxv2f32(
@@ -370,11 +374,12 @@ declare <vscale x 4 x double> @llvm.riscv.vfwmul.nxv4f64.nxv4f32.nxv4f32(
define <vscale x 4 x double> @intrinsic_vfwmul_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwmul_vv_nxv4f64_nxv4f32_nxv4f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vfwmul.vv v12, v8, v10
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwmul.vv v8, v14, v12
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x double> @llvm.riscv.vfwmul.nxv4f64.nxv4f32.nxv4f32(
@@ -421,11 +426,12 @@ declare <vscale x 8 x double> @llvm.riscv.vfwmul.nxv8f64.nxv8f32.nxv8f32(
define <vscale x 8 x double> @intrinsic_vfwmul_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwmul_vv_nxv8f64_nxv8f32_nxv8f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vfwmul.vv v16, v8, v12
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwmul.vv v8, v20, v16
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x double> @llvm.riscv.vfwmul.nxv8f64.nxv8f32.nxv8f32(
@@ -574,11 +580,11 @@ declare <vscale x 4 x float> @llvm.riscv.vfwmul.nxv4f32.nxv4f16.f16(
define <vscale x 4 x float> @intrinsic_vfwmul_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x half> %0, half %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwmul_vf_nxv4f32_nxv4f16_f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vfwmul.vf v10, v8, fa0
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwmul.vf v8, v10, fa0
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x float> @llvm.riscv.vfwmul.nxv4f32.nxv4f16.f16(
@@ -625,11 +631,11 @@ declare <vscale x 8 x float> @llvm.riscv.vfwmul.nxv8f32.nxv8f16.f16(
define <vscale x 8 x float> @intrinsic_vfwmul_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x half> %0, half %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwmul_vf_nxv8f32_nxv8f16_f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vfwmul.vf v12, v8, fa0
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwmul.vf v8, v12, fa0
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x float> @llvm.riscv.vfwmul.nxv8f32.nxv8f16.f16(
@@ -676,11 +682,11 @@ declare <vscale x 16 x float> @llvm.riscv.vfwmul.nxv16f32.nxv16f16.f16(
define <vscale x 16 x float> @intrinsic_vfwmul_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x half> %0, half %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwmul_vf_nxv16f32_nxv16f16_f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vfwmul.vf v16, v8, fa0
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwmul.vf v8, v16, fa0
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x float> @llvm.riscv.vfwmul.nxv16f32.nxv16f16.f16(
@@ -778,11 +784,11 @@ declare <vscale x 2 x double> @llvm.riscv.vfwmul.nxv2f64.nxv2f32.f32(
define <vscale x 2 x double> @intrinsic_vfwmul_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x float> %0, float %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwmul_vf_nxv2f64_nxv2f32_f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwmul.vf v10, v8, fa0
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwmul.vf v8, v10, fa0
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x double> @llvm.riscv.vfwmul.nxv2f64.nxv2f32.f32(
@@ -829,11 +835,11 @@ declare <vscale x 4 x double> @llvm.riscv.vfwmul.nxv4f64.nxv4f32.f32(
define <vscale x 4 x double> @intrinsic_vfwmul_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x float> %0, float %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwmul_vf_nxv4f64_nxv4f32_f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vfwmul.vf v12, v8, fa0
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwmul.vf v8, v12, fa0
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x double> @llvm.riscv.vfwmul.nxv4f64.nxv4f32.f32(
@@ -880,11 +886,11 @@ declare <vscale x 8 x double> @llvm.riscv.vfwmul.nxv8f64.nxv8f32.f32(
define <vscale x 8 x double> @intrinsic_vfwmul_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x float> %0, float %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwmul_vf_nxv8f64_nxv8f32_f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vfwmul.vf v16, v8, fa0
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwmul.vf v8, v16, fa0
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x double> @llvm.riscv.vfwmul.nxv8f64.nxv8f32.f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll
index e447babb03723..fa328356ab585 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll
@@ -299,11 +299,11 @@ define <vscale x 4 x float> @vfnmacc_vv_nxv4f32(<vscale x 4 x half> %a, <vscale
; ZVFHMIN-LABEL: vfnmacc_vv_nxv4f32:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv1r.v v12, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfnmadd.vv v12, v14, v10, v0.t
-; ZVFHMIN-NEXT: vmv.v.v v8, v12
+; ZVFHMIN-NEXT: vfnmadd.vv v8, v14, v10, v0.t
; ZVFHMIN-NEXT: ret
%aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> %m, i32 %evl)
%bext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl)
@@ -324,11 +324,11 @@ define <vscale x 4 x float> @vfnmacc_vv_nxv4f32_unmasked(<vscale x 4 x half> %a,
; ZVFHMIN-LABEL: vfnmacc_vv_nxv4f32_unmasked:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv1r.v v12, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfnmadd.vv v12, v14, v10
-; ZVFHMIN-NEXT: vmv.v.v v8, v12
+; ZVFHMIN-NEXT: vfnmadd.vv v8, v14, v10
; ZVFHMIN-NEXT: ret
%aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
%bext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
@@ -350,11 +350,11 @@ define <vscale x 4 x float> @vfnmacc_vf_nxv4f32(<vscale x 4 x half> %a, half %b,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT: vmv.v.x v14, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v14, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfnmadd.vv v8, v14, v10, v0.t
+; ZVFHMIN-NEXT: vfnmadd.vv v8, v12, v10, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -377,14 +377,14 @@ define <vscale x 4 x float> @vfnmacc_vf_nxv4f32_commute(<vscale x 4 x half> %a,
;
; ZVFHMIN-LABEL: vfnmacc_vf_nxv4f32_commute:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9, v0.t
+; ZVFHMIN-NEXT: vmv1r.v v12, v8
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vmv.v.x v14, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v14, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfnmadd.vv v12, v14, v10, v0.t
-; ZVFHMIN-NEXT: vmv.v.v v8, v12
+; ZVFHMIN-NEXT: vfnmadd.vv v8, v12, v10, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -408,11 +408,11 @@ define <vscale x 4 x float> @vfnmacc_vf_nxv4f32_unmasked(<vscale x 4 x half> %a,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT: vmv.v.x v14, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v14
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfnmadd.vv v8, v14, v10
+; ZVFHMIN-NEXT: vfnmadd.vv v8, v12, v10
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -439,11 +439,11 @@ define <vscale x 8 x float> @vfnmacc_vv_nxv8f32(<vscale x 8 x half> %a, <vscale
; ZVFHMIN-LABEL: vfnmacc_vv_nxv8f32:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vmv2r.v v16, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfnmadd.vv v16, v20, v12, v0.t
-; ZVFHMIN-NEXT: vmv.v.v v8, v16
+; ZVFHMIN-NEXT: vfnmadd.vv v8, v20, v12, v0.t
; ZVFHMIN-NEXT: ret
%aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %m, i32 %evl)
%bext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl)
@@ -464,11 +464,11 @@ define <vscale x 8 x float> @vfnmacc_vv_nxv8f32_unmasked(<vscale x 8 x half> %a,
; ZVFHMIN-LABEL: vfnmacc_vv_nxv8f32_unmasked:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vmv2r.v v16, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfnmadd.vv v16, v20, v12
-; ZVFHMIN-NEXT: vmv.v.v v8, v16
+; ZVFHMIN-NEXT: vfnmadd.vv v8, v20, v12
; ZVFHMIN-NEXT: ret
%aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
%bext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
@@ -490,11 +490,11 @@ define <vscale x 8 x float> @vfnmacc_vf_nxv8f32(<vscale x 8 x half> %a, half %b,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v16, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT: vmv.v.x v20, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfnmadd.vv v8, v20, v12, v0.t
+; ZVFHMIN-NEXT: vfnmadd.vv v8, v16, v12, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -517,14 +517,14 @@ define <vscale x 8 x float> @vfnmacc_vf_nxv8f32_commute(<vscale x 8 x half> %a,
;
; ZVFHMIN-LABEL: vfnmacc_vf_nxv8f32_commute:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10, v0.t
+; ZVFHMIN-NEXT: vmv2r.v v16, v8
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vmv.v.x v20, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v20, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfnmadd.vv v16, v20, v12, v0.t
-; ZVFHMIN-NEXT: vmv.v.v v8, v16
+; ZVFHMIN-NEXT: vfnmadd.vv v8, v16, v12, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -548,11 +548,11 @@ define <vscale x 8 x float> @vfnmacc_vf_nxv8f32_unmasked(<vscale x 8 x half> %a,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v16, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT: vmv.v.x v20, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfnmadd.vv v8, v20, v12
+; ZVFHMIN-NEXT: vfnmadd.vv v8, v16, v12
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -580,12 +580,10 @@ define <vscale x 16 x float> @vfnmacc_vv_nxv16f32(<vscale x 16 x half> %a, <vsca
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFHMIN-NEXT: vmv4r.v v4, v12
-; ZVFHMIN-NEXT: vmv4r.v v24, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfnmadd.vv v24, v8, v16, v0.t
-; ZVFHMIN-NEXT: vmv.v.v v8, v24
+; ZVFHMIN-NEXT: vfnmadd.vv v8, v24, v16, v0.t
; ZVFHMIN-NEXT: ret
%aext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x i1> %m, i32 %evl)
%bext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %b, <vscale x 16 x i1> %m, i32 %evl)
@@ -606,11 +604,11 @@ define <vscale x 16 x float> @vfnmacc_vv_nxv16f32_unmasked(<vscale x 16 x half>
; ZVFHMIN-LABEL: vfnmacc_vv_nxv16f32_unmasked:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vmv4r.v v24, v12
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfnmadd.vv v24, v0, v16
-; ZVFHMIN-NEXT: vmv.v.v v8, v24
+; ZVFHMIN-NEXT: vfnmadd.vv v8, v0, v16
; ZVFHMIN-NEXT: ret
%aext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
%bext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
@@ -659,14 +657,14 @@ define <vscale x 16 x float> @vfnmacc_vf_nxv16f32_commute(<vscale x 16 x half> %
;
; ZVFHMIN-LABEL: vfnmacc_vf_nxv16f32_commute:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v4, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t
+; ZVFHMIN-NEXT: vmv4r.v v24, v8
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vmv.v.x v4, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfnmadd.vv v24, v8, v16, v0.t
-; ZVFHMIN-NEXT: vmv.v.v v8, v24
+; ZVFHMIN-NEXT: vfnmadd.vv v8, v24, v16, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -690,11 +688,11 @@ define <vscale x 16 x float> @vfnmacc_vf_nxv16f32_unmasked(<vscale x 16 x half>
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v24, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
+; ZVFHMIN-NEXT: vmv.v.x v4, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfnmadd.vv v8, v0, v16
+; ZVFHMIN-NEXT: vfnmadd.vv v8, v24, v16
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll
index ea705b2732edf..cc0ae35780a60 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll
@@ -287,11 +287,11 @@ define <vscale x 4 x float> @vfnmsac_vv_nxv4f32(<vscale x 4 x half> %a, <vscale
; ZVFHMIN-LABEL: vfnmsac_vv_nxv4f32:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv1r.v v12, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfnmsub.vv v12, v14, v10, v0.t
-; ZVFHMIN-NEXT: vmv.v.v v8, v12
+; ZVFHMIN-NEXT: vfnmsub.vv v8, v14, v10, v0.t
; ZVFHMIN-NEXT: ret
%aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> %m, i32 %evl)
%bext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl)
@@ -311,11 +311,11 @@ define <vscale x 4 x float> @vfnmsac_vv_nxv4f32_unmasked(<vscale x 4 x half> %a,
; ZVFHMIN-LABEL: vfnmsac_vv_nxv4f32_unmasked:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv1r.v v12, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfnmsub.vv v12, v14, v10
-; ZVFHMIN-NEXT: vmv.v.v v8, v12
+; ZVFHMIN-NEXT: vfnmsub.vv v8, v14, v10
; ZVFHMIN-NEXT: ret
%aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
%bext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
@@ -336,11 +336,11 @@ define <vscale x 4 x float> @vfnmsac_vf_nxv4f32(<vscale x 4 x half> %a, half %b,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT: vmv.v.x v14, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v14, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfnmsub.vv v8, v14, v10, v0.t
+; ZVFHMIN-NEXT: vfnmsub.vv v8, v12, v10, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -361,14 +361,14 @@ define <vscale x 4 x float> @vfnmsac_vf_nxv4f32_commute(<vscale x 4 x half> %a,
;
; ZVFHMIN-LABEL: vfnmsac_vf_nxv4f32_commute:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9, v0.t
+; ZVFHMIN-NEXT: vmv1r.v v12, v8
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vmv.v.x v14, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v14, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfnmsub.vv v12, v14, v10, v0.t
-; ZVFHMIN-NEXT: vmv.v.v v8, v12
+; ZVFHMIN-NEXT: vfnmsub.vv v8, v12, v10, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -391,11 +391,11 @@ define <vscale x 4 x float> @vfnmsac_vf_nxv4f32_unmasked(<vscale x 4 x half> %a,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT: vmv.v.x v14, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v14
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfnmsub.vv v8, v14, v10
+; ZVFHMIN-NEXT: vfnmsub.vv v8, v12, v10
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -421,11 +421,11 @@ define <vscale x 8 x float> @vfnmsac_vv_nxv8f32(<vscale x 8 x half> %a, <vscale
; ZVFHMIN-LABEL: vfnmsac_vv_nxv8f32:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vmv2r.v v16, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfnmsub.vv v16, v20, v12, v0.t
-; ZVFHMIN-NEXT: vmv.v.v v8, v16
+; ZVFHMIN-NEXT: vfnmsub.vv v8, v20, v12, v0.t
; ZVFHMIN-NEXT: ret
%aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %m, i32 %evl)
%bext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl)
@@ -445,11 +445,11 @@ define <vscale x 8 x float> @vfnmsac_vv_nxv8f32_unmasked(<vscale x 8 x half> %a,
; ZVFHMIN-LABEL: vfnmsac_vv_nxv8f32_unmasked:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vmv2r.v v16, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfnmsub.vv v16, v20, v12
-; ZVFHMIN-NEXT: vmv.v.v v8, v16
+; ZVFHMIN-NEXT: vfnmsub.vv v8, v20, v12
; ZVFHMIN-NEXT: ret
%aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
%bext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
@@ -470,11 +470,11 @@ define <vscale x 8 x float> @vfnmsac_vf_nxv8f32(<vscale x 8 x half> %a, half %b,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v16, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT: vmv.v.x v20, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfnmsub.vv v8, v20, v12, v0.t
+; ZVFHMIN-NEXT: vfnmsub.vv v8, v16, v12, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -495,14 +495,14 @@ define <vscale x 8 x float> @vfnmsac_vf_nxv8f32_commute(<vscale x 8 x half> %a,
;
; ZVFHMIN-LABEL: vfnmsac_vf_nxv8f32_commute:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10, v0.t
+; ZVFHMIN-NEXT: vmv2r.v v16, v8
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vmv.v.x v20, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v20, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfnmsub.vv v16, v20, v12, v0.t
-; ZVFHMIN-NEXT: vmv.v.v v8, v16
+; ZVFHMIN-NEXT: vfnmsub.vv v8, v16, v12, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -525,11 +525,11 @@ define <vscale x 8 x float> @vfnmsac_vf_nxv8f32_unmasked(<vscale x 8 x half> %a,
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v16, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT: vmv.v.x v20, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfnmsub.vv v8, v20, v12
+; ZVFHMIN-NEXT: vfnmsub.vv v8, v16, v12
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -556,12 +556,10 @@ define <vscale x 16 x float> @vfnmsac_vv_nxv16f32(<vscale x 16 x half> %a, <vsca
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFHMIN-NEXT: vmv4r.v v4, v12
-; ZVFHMIN-NEXT: vmv4r.v v24, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfnmsub.vv v24, v8, v16, v0.t
-; ZVFHMIN-NEXT: vmv.v.v v8, v24
+; ZVFHMIN-NEXT: vfnmsub.vv v8, v24, v16, v0.t
; ZVFHMIN-NEXT: ret
%aext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x i1> %m, i32 %evl)
%bext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %b, <vscale x 16 x i1> %m, i32 %evl)
@@ -581,11 +579,11 @@ define <vscale x 16 x float> @vfnmsac_vv_nxv16f32_unmasked(<vscale x 16 x half>
; ZVFHMIN-LABEL: vfnmsac_vv_nxv16f32_unmasked:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vmv4r.v v24, v12
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfnmsub.vv v24, v0, v16
-; ZVFHMIN-NEXT: vmv.v.v v8, v24
+; ZVFHMIN-NEXT: vfnmsub.vv v8, v0, v16
; ZVFHMIN-NEXT: ret
%aext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
%bext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
@@ -631,14 +629,14 @@ define <vscale x 16 x float> @vfnmsac_vf_nxv16f32_commute(<vscale x 16 x half> %
;
; ZVFHMIN-LABEL: vfnmsac_vf_nxv16f32_commute:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v4, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t
+; ZVFHMIN-NEXT: vmv4r.v v24, v8
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vmv.v.x v4, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfnmsub.vv v24, v8, v16, v0.t
-; ZVFHMIN-NEXT: vmv.v.v v8, v24
+; ZVFHMIN-NEXT: vfnmsub.vv v8, v24, v16, v0.t
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -661,11 +659,11 @@ define <vscale x 16 x float> @vfnmsac_vf_nxv16f32_unmasked(<vscale x 16 x half>
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v24, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
+; ZVFHMIN-NEXT: vmv.v.x v4, a1
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfnmsub.vv v8, v0, v16
+; ZVFHMIN-NEXT: vfnmsub.vv v8, v24, v16
; ZVFHMIN-NEXT: ret
%elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
%vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub-sdnode.ll
index d0cb64d986661..9393cbd4ccafc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwsub-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub-sdnode.ll
@@ -88,8 +88,9 @@ define <vscale x 2 x double> @vfwsub_vv_nxv2f64(<vscale x 2 x float> %va, <vscal
; CHECK-LABEL: vfwsub_vv_nxv2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfwsub.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vfwsub.vv v8, v11, v10
; CHECK-NEXT: ret
%vc = fpext <vscale x 2 x float> %va to <vscale x 2 x double>
%vd = fpext <vscale x 2 x float> %vb to <vscale x 2 x double>
@@ -101,8 +102,8 @@ define <vscale x 2 x double> @vfwsub_vf_nxv2f64(<vscale x 2 x float> %va, float
; CHECK-LABEL: vfwsub_vf_nxv2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfwsub.vf v10, v8, fa0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwsub.vf v8, v10, fa0
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x float> poison, float %b, i32 0
%splat = shufflevector <vscale x 2 x float> %head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
@@ -116,8 +117,8 @@ define <vscale x 2 x double> @vfwsub_vf_nxv2f64_2(<vscale x 2 x float> %va, floa
; CHECK-LABEL: vfwsub_vf_nxv2f64_2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfwsub.vf v10, v8, fa0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwsub.vf v8, v10, fa0
; CHECK-NEXT: ret
%fpext = fpext float %b to double
%head = insertelement <vscale x 2 x double> poison, double %fpext, i32 0
@@ -168,8 +169,9 @@ define <vscale x 4 x double> @vfwsub_vv_nxv4f64(<vscale x 4 x float> %va, <vscal
; CHECK-LABEL: vfwsub_vv_nxv4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfwsub.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vfwsub.vv v8, v14, v12
; CHECK-NEXT: ret
%vc = fpext <vscale x 4 x float> %va to <vscale x 4 x double>
%vd = fpext <vscale x 4 x float> %vb to <vscale x 4 x double>
@@ -181,8 +183,8 @@ define <vscale x 4 x double> @vfwsub_vf_nxv4f64(<vscale x 4 x float> %va, float
; CHECK-LABEL: vfwsub_vf_nxv4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfwsub.vf v12, v8, fa0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwsub.vf v8, v12, fa0
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x float> poison, float %b, i32 0
%splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -196,8 +198,8 @@ define <vscale x 4 x double> @vfwsub_vf_nxv4f64_2(<vscale x 4 x float> %va, floa
; CHECK-LABEL: vfwsub_vf_nxv4f64_2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfwsub.vf v12, v8, fa0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwsub.vf v8, v12, fa0
; CHECK-NEXT: ret
%fpext = fpext float %b to double
%head = insertelement <vscale x 4 x double> poison, double %fpext, i32 0
@@ -248,8 +250,9 @@ define <vscale x 8 x double> @vfwsub_vv_nxv8f64(<vscale x 8 x float> %va, <vscal
; CHECK-LABEL: vfwsub_vv_nxv8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfwsub.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vfwsub.vv v8, v20, v16
; CHECK-NEXT: ret
%vc = fpext <vscale x 8 x float> %va to <vscale x 8 x double>
%vd = fpext <vscale x 8 x float> %vb to <vscale x 8 x double>
@@ -261,8 +264,8 @@ define <vscale x 8 x double> @vfwsub_vf_nxv8f64(<vscale x 8 x float> %va, float
; CHECK-LABEL: vfwsub_vf_nxv8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfwsub.vf v16, v8, fa0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwsub.vf v8, v16, fa0
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x float> poison, float %b, i32 0
%splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -276,8 +279,8 @@ define <vscale x 8 x double> @vfwsub_vf_nxv8f64_2(<vscale x 8 x float> %va, floa
; CHECK-LABEL: vfwsub_vf_nxv8f64_2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfwsub.vf v16, v8, fa0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwsub.vf v8, v16, fa0
; CHECK-NEXT: ret
%fpext = fpext float %b to double
%head = insertelement <vscale x 8 x double> poison, double %fpext, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub.ll
index 4f263c63e545b..703a69d048311 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwsub.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub.ll
@@ -115,11 +115,12 @@ declare <vscale x 4 x float> @llvm.riscv.vfwsub.nxv4f32.nxv4f16.nxv4f16(
define <vscale x 4 x float> @intrinsic_vfwsub_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwsub_vv_nxv4f32_nxv4f16_nxv4f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vfwsub.vv v10, v8, v9
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.vv v8, v11, v10
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x float> @llvm.riscv.vfwsub.nxv4f32.nxv4f16.nxv4f16(
@@ -166,11 +167,12 @@ declare <vscale x 8 x float> @llvm.riscv.vfwsub.nxv8f32.nxv8f16.nxv8f16(
define <vscale x 8 x float> @intrinsic_vfwsub_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwsub_vv_nxv8f32_nxv8f16_nxv8f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vfwsub.vv v12, v8, v10
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.vv v8, v14, v12
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x float> @llvm.riscv.vfwsub.nxv8f32.nxv8f16.nxv8f16(
@@ -217,11 +219,12 @@ declare <vscale x 16 x float> @llvm.riscv.vfwsub.nxv16f32.nxv16f16.nxv16f16(
define <vscale x 16 x float> @intrinsic_vfwsub_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwsub_vv_nxv16f32_nxv16f16_nxv16f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vfwsub.vv v16, v8, v12
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.vv v8, v20, v16
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x float> @llvm.riscv.vfwsub.nxv16f32.nxv16f16.nxv16f16(
@@ -319,11 +322,12 @@ declare <vscale x 2 x double> @llvm.riscv.vfwsub.nxv2f64.nxv2f32.nxv2f32(
define <vscale x 2 x double> @intrinsic_vfwsub_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwsub_vv_nxv2f64_nxv2f32_nxv2f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwsub.vv v10, v8, v9
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.vv v8, v11, v10
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x double> @llvm.riscv.vfwsub.nxv2f64.nxv2f32.nxv2f32(
@@ -370,11 +374,12 @@ declare <vscale x 4 x double> @llvm.riscv.vfwsub.nxv4f64.nxv4f32.nxv4f32(
define <vscale x 4 x double> @intrinsic_vfwsub_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwsub_vv_nxv4f64_nxv4f32_nxv4f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vfwsub.vv v12, v8, v10
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.vv v8, v14, v12
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x double> @llvm.riscv.vfwsub.nxv4f64.nxv4f32.nxv4f32(
@@ -421,11 +426,12 @@ declare <vscale x 8 x double> @llvm.riscv.vfwsub.nxv8f64.nxv8f32.nxv8f32(
define <vscale x 8 x double> @intrinsic_vfwsub_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwsub_vv_nxv8f64_nxv8f32_nxv8f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vfwsub.vv v16, v8, v12
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.vv v8, v20, v16
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x double> @llvm.riscv.vfwsub.nxv8f64.nxv8f32.nxv8f32(
@@ -574,11 +580,11 @@ declare <vscale x 4 x float> @llvm.riscv.vfwsub.nxv4f32.nxv4f16.f16(
define <vscale x 4 x float> @intrinsic_vfwsub_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x half> %0, half %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwsub_vf_nxv4f32_nxv4f16_f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vfwsub.vf v10, v8, fa0
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.vf v8, v10, fa0
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x float> @llvm.riscv.vfwsub.nxv4f32.nxv4f16.f16(
@@ -625,11 +631,11 @@ declare <vscale x 8 x float> @llvm.riscv.vfwsub.nxv8f32.nxv8f16.f16(
define <vscale x 8 x float> @intrinsic_vfwsub_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x half> %0, half %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwsub_vf_nxv8f32_nxv8f16_f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vfwsub.vf v12, v8, fa0
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.vf v8, v12, fa0
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x float> @llvm.riscv.vfwsub.nxv8f32.nxv8f16.f16(
@@ -676,11 +682,11 @@ declare <vscale x 16 x float> @llvm.riscv.vfwsub.nxv16f32.nxv16f16.f16(
define <vscale x 16 x float> @intrinsic_vfwsub_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x half> %0, half %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwsub_vf_nxv16f32_nxv16f16_f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vfwsub.vf v16, v8, fa0
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.vf v8, v16, fa0
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x float> @llvm.riscv.vfwsub.nxv16f32.nxv16f16.f16(
@@ -778,11 +784,11 @@ declare <vscale x 2 x double> @llvm.riscv.vfwsub.nxv2f64.nxv2f32.f32(
define <vscale x 2 x double> @intrinsic_vfwsub_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x float> %0, float %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwsub_vf_nxv2f64_nxv2f32_f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwsub.vf v10, v8, fa0
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.vf v8, v10, fa0
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x double> @llvm.riscv.vfwsub.nxv2f64.nxv2f32.f32(
@@ -829,11 +835,11 @@ declare <vscale x 4 x double> @llvm.riscv.vfwsub.nxv4f64.nxv4f32.f32(
define <vscale x 4 x double> @intrinsic_vfwsub_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x float> %0, float %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwsub_vf_nxv4f64_nxv4f32_f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vfwsub.vf v12, v8, fa0
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.vf v8, v12, fa0
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x double> @llvm.riscv.vfwsub.nxv4f64.nxv4f32.f32(
@@ -880,11 +886,11 @@ declare <vscale x 8 x double> @llvm.riscv.vfwsub.nxv8f64.nxv8f32.f32(
define <vscale x 8 x double> @intrinsic_vfwsub_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x float> %0, float %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwsub_vf_nxv8f64_nxv8f32_f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vfwsub.vf v16, v8, fa0
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.vf v8, v16, fa0
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x double> @llvm.riscv.vfwsub.nxv8f64.nxv8f32.f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w.ll
index fdb48a6f10d3a..ab45345a7a39d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w.ll
@@ -1289,11 +1289,11 @@ entry:
define <vscale x 4 x float> @intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x float> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vfwsub.wv v12, v10, v8
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.wv v8, v10, v12
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.nxv4f16(
@@ -1308,11 +1308,11 @@ entry:
define <vscale x 8 x float> @intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x float> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vfwsub.wv v16, v12, v8
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.wv v8, v12, v16
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.nxv8f16(
@@ -1346,11 +1346,11 @@ entry:
define <vscale x 2 x double> @intrinsic_vfwsub.w_wv_untie_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x double> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv2f64_nxv2f64_nxv2f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwsub.wv v12, v10, v8
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.wv v8, v10, v12
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32(
@@ -1365,11 +1365,11 @@ entry:
define <vscale x 4 x double> @intrinsic_vfwsub.w_wv_untie_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x double> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f64_nxv4f64_nxv4f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vfwsub.wv v16, v12, v8
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.wv v8, v12, v16
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32(
@@ -1384,11 +1384,11 @@ entry:
define <vscale x 8 x double> @intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x double> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vfwsub.wv v24, v16, v8
-; CHECK-NEXT: fsrm a1
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vmv4r.v v24, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfwsub.wv v8, v16, v24
+; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x double> @llvm.riscv.vfwsub.w.nxv8f64.nxv8f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vitofp-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vitofp-constrained-sdnode.ll
index f25a27ca2b905..599a66d191fd2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vitofp-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vitofp-constrained-sdnode.ll
@@ -641,8 +641,8 @@ define <vscale x 8 x half> @vsitofp_nxv8i8_nxv8f16(<vscale x 8 x i8> %va) strict
; CHECK-LABEL: vsitofp_nxv8i8_nxv8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v10
; CHECK-NEXT: ret
%evec = call <vscale x 8 x half> @llvm.experimental.constrained.sitofp.nxv8f16.nxv8i8(<vscale x 8 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <vscale x 8 x half> %evec
@@ -653,8 +653,8 @@ define <vscale x 8 x half> @vuitofp_nxv8i8_nxv8f16(<vscale x 8 x i8> %va) strict
; CHECK-LABEL: vuitofp_nxv8i8_nxv8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v10
; CHECK-NEXT: ret
%evec = call <vscale x 8 x half> @llvm.experimental.constrained.uitofp.nxv8f16.nxv8i8(<vscale x 8 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <vscale x 8 x half> %evec
@@ -713,8 +713,8 @@ define <vscale x 16 x half> @vsitofp_nxv16i8_nxv16f16(<vscale x 16 x i8> %va) st
; CHECK-LABEL: vsitofp_nxv16i8_nxv16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v12
; CHECK-NEXT: ret
%evec = call <vscale x 16 x half> @llvm.experimental.constrained.sitofp.nxv16f16.nxv16i8(<vscale x 16 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <vscale x 16 x half> %evec
@@ -725,8 +725,8 @@ define <vscale x 16 x half> @vuitofp_nxv16i8_nxv16f16(<vscale x 16 x i8> %va) st
; CHECK-LABEL: vuitofp_nxv16i8_nxv16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v12
; CHECK-NEXT: ret
%evec = call <vscale x 16 x half> @llvm.experimental.constrained.uitofp.nxv16f16.nxv16i8(<vscale x 16 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <vscale x 16 x half> %evec
@@ -761,8 +761,8 @@ define <vscale x 32 x half> @vsitofp_nxv32i8_nxv32f16(<vscale x 32 x i8> %va) st
; CHECK-LABEL: vsitofp_nxv32i8_nxv32f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v16
; CHECK-NEXT: ret
%evec = call <vscale x 32 x half> @llvm.experimental.constrained.sitofp.nxv32f16.nxv32i8(<vscale x 32 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <vscale x 32 x half> %evec
@@ -773,8 +773,8 @@ define <vscale x 32 x half> @vuitofp_nxv32i8_nxv32f16(<vscale x 32 x i8> %va) st
; CHECK-LABEL: vuitofp_nxv32i8_nxv32f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v16
; CHECK-NEXT: ret
%evec = call <vscale x 32 x half> @llvm.experimental.constrained.uitofp.nxv32f16.nxv32i8(<vscale x 32 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <vscale x 32 x half> %evec
@@ -947,8 +947,8 @@ define <vscale x 4 x float> @vsitofp_nxv4i16_nxv4f32(<vscale x 4 x i16> %va) str
; CHECK-LABEL: vsitofp_nxv4i16_nxv4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v10
; CHECK-NEXT: ret
%evec = call <vscale x 4 x float> @llvm.experimental.constrained.sitofp.nxv4f32.nxv4i16(<vscale x 4 x i16> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <vscale x 4 x float> %evec
@@ -959,8 +959,8 @@ define <vscale x 4 x float> @vuitofp_nxv4i16_nxv4f32(<vscale x 4 x i16> %va) str
; CHECK-LABEL: vuitofp_nxv4i16_nxv4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v10
; CHECK-NEXT: ret
%evec = call <vscale x 4 x float> @llvm.experimental.constrained.uitofp.nxv4f32.nxv4i16(<vscale x 4 x i16> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <vscale x 4 x float> %evec
@@ -1017,8 +1017,8 @@ define <vscale x 8 x float> @vsitofp_nxv8i16_nxv8f32(<vscale x 8 x i16> %va) str
; CHECK-LABEL: vsitofp_nxv8i16_nxv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v12
; CHECK-NEXT: ret
%evec = call <vscale x 8 x float> @llvm.experimental.constrained.sitofp.nxv8f32.nxv8i16(<vscale x 8 x i16> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <vscale x 8 x float> %evec
@@ -1029,8 +1029,8 @@ define <vscale x 8 x float> @vuitofp_nxv8i16_nxv8f32(<vscale x 8 x i16> %va) str
; CHECK-LABEL: vuitofp_nxv8i16_nxv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v12
; CHECK-NEXT: ret
%evec = call <vscale x 8 x float> @llvm.experimental.constrained.uitofp.nxv8f32.nxv8i16(<vscale x 8 x i16> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <vscale x 8 x float> %evec
@@ -1087,8 +1087,8 @@ define <vscale x 16 x float> @vsitofp_nxv16i16_nxv16f32(<vscale x 16 x i16> %va)
; CHECK-LABEL: vsitofp_nxv16i16_nxv16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v16
; CHECK-NEXT: ret
%evec = call <vscale x 16 x float> @llvm.experimental.constrained.sitofp.nxv16f32.nxv16i16(<vscale x 16 x i16> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <vscale x 16 x float> %evec
@@ -1099,8 +1099,8 @@ define <vscale x 16 x float> @vuitofp_nxv16i16_nxv16f32(<vscale x 16 x i16> %va)
; CHECK-LABEL: vuitofp_nxv16i16_nxv16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v16
; CHECK-NEXT: ret
%evec = call <vscale x 16 x float> @llvm.experimental.constrained.uitofp.nxv16f32.nxv16i16(<vscale x 16 x i16> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <vscale x 16 x float> %evec
@@ -1249,8 +1249,8 @@ define <vscale x 2 x double> @vsitofp_nxv2i32_nxv2f64(<vscale x 2 x i32> %va) st
; CHECK-LABEL: vsitofp_nxv2i32_nxv2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v10
; CHECK-NEXT: ret
%evec = call <vscale x 2 x double> @llvm.experimental.constrained.sitofp.nxv2f64.nxv2i32(<vscale x 2 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <vscale x 2 x double> %evec
@@ -1261,8 +1261,8 @@ define <vscale x 2 x double> @vuitofp_nxv2i32_nxv2f64(<vscale x 2 x i32> %va) st
; CHECK-LABEL: vuitofp_nxv2i32_nxv2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v10
; CHECK-NEXT: ret
%evec = call <vscale x 2 x double> @llvm.experimental.constrained.uitofp.nxv2f64.nxv2i32(<vscale x 2 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <vscale x 2 x double> %evec
@@ -1319,8 +1319,8 @@ define <vscale x 4 x double> @vsitofp_nxv4i32_nxv4f64(<vscale x 4 x i32> %va) st
; CHECK-LABEL: vsitofp_nxv4i32_nxv4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v12
; CHECK-NEXT: ret
%evec = call <vscale x 4 x double> @llvm.experimental.constrained.sitofp.nxv4f64.nxv4i32(<vscale x 4 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <vscale x 4 x double> %evec
@@ -1331,8 +1331,8 @@ define <vscale x 4 x double> @vuitofp_nxv4i32_nxv4f64(<vscale x 4 x i32> %va) st
; CHECK-LABEL: vuitofp_nxv4i32_nxv4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v12
; CHECK-NEXT: ret
%evec = call <vscale x 4 x double> @llvm.experimental.constrained.uitofp.nxv4f64.nxv4i32(<vscale x 4 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <vscale x 4 x double> %evec
@@ -1389,8 +1389,8 @@ define <vscale x 8 x double> @vsitofp_nxv8i32_nxv8f64(<vscale x 8 x i32> %va) st
; CHECK-LABEL: vsitofp_nxv8i32_nxv8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v16
; CHECK-NEXT: ret
%evec = call <vscale x 8 x double> @llvm.experimental.constrained.sitofp.nxv8f64.nxv8i32(<vscale x 8 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <vscale x 8 x double> %evec
@@ -1401,8 +1401,8 @@ define <vscale x 8 x double> @vuitofp_nxv8i32_nxv8f64(<vscale x 8 x i32> %va) st
; CHECK-LABEL: vuitofp_nxv8i32_nxv8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v16
; CHECK-NEXT: ret
%evec = call <vscale x 8 x double> @llvm.experimental.constrained.uitofp.nxv8f64.nxv8i32(<vscale x 8 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <vscale x 8 x double> %evec
diff --git a/llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll
index 4265663c1feee..95b1c35d48bb3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll
@@ -339,11 +339,11 @@ define <vscale x 32 x bfloat> @vsitofp_nxv32i8_nxv32bf16(<vscale x 32 x i8> %va)
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; CHECK-NEXT: vsext.vf2 v12, v8
-; CHECK-NEXT: vsext.vf2 v16, v10
-; CHECK-NEXT: vfwcvt.f.x.v v24, v12
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
-; CHECK-NEXT: vfwcvt.f.x.v v24, v16
-; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24
+; CHECK-NEXT: vsext.vf2 v24, v10
+; CHECK-NEXT: vfwcvt.f.x.v v16, v12
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: vfwcvt.f.x.v v16, v24
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
; CHECK-NEXT: ret
%evec = sitofp <vscale x 32 x i8> %va to <vscale x 32 x bfloat>
ret <vscale x 32 x bfloat> %evec
@@ -354,11 +354,11 @@ define <vscale x 32 x bfloat> @vuitofp_nxv32i8_nxv32bf16(<vscale x 32 x i8> %va)
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; CHECK-NEXT: vzext.vf2 v12, v8
-; CHECK-NEXT: vzext.vf2 v16, v10
-; CHECK-NEXT: vfwcvt.f.xu.v v24, v12
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
-; CHECK-NEXT: vfwcvt.f.xu.v v24, v16
-; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24
+; CHECK-NEXT: vzext.vf2 v24, v10
+; CHECK-NEXT: vfwcvt.f.xu.v v16, v12
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: vfwcvt.f.xu.v v16, v24
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
; CHECK-NEXT: ret
%evec = uitofp <vscale x 32 x i8> %va to <vscale x 32 x bfloat>
ret <vscale x 32 x bfloat> %evec
@@ -1496,8 +1496,8 @@ define <vscale x 8 x half> @vsitofp_nxv8i8_nxv8f16(<vscale x 8 x i8> %va) {
; ZVFH-LABEL: vsitofp_nxv8i8_nxv8f16:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetvli a0, zero, e8, m1, ta, ma
-; ZVFH-NEXT: vfwcvt.f.x.v v10, v8
-; ZVFH-NEXT: vmv2r.v v8, v10
+; ZVFH-NEXT: vmv1r.v v10, v8
+; ZVFH-NEXT: vfwcvt.f.x.v v8, v10
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vsitofp_nxv8i8_nxv8f16:
@@ -1515,8 +1515,8 @@ define <vscale x 8 x half> @vuitofp_nxv8i8_nxv8f16(<vscale x 8 x i8> %va) {
; ZVFH-LABEL: vuitofp_nxv8i8_nxv8f16:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetvli a0, zero, e8, m1, ta, ma
-; ZVFH-NEXT: vfwcvt.f.xu.v v10, v8
-; ZVFH-NEXT: vmv2r.v v8, v10
+; ZVFH-NEXT: vmv1r.v v10, v8
+; ZVFH-NEXT: vfwcvt.f.xu.v v8, v10
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vuitofp_nxv8i8_nxv8f16:
@@ -1578,8 +1578,8 @@ define <vscale x 16 x half> @vsitofp_nxv16i8_nxv16f16(<vscale x 16 x i8> %va) {
; ZVFH-LABEL: vsitofp_nxv16i8_nxv16f16:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetvli a0, zero, e8, m2, ta, ma
-; ZVFH-NEXT: vfwcvt.f.x.v v12, v8
-; ZVFH-NEXT: vmv4r.v v8, v12
+; ZVFH-NEXT: vmv2r.v v12, v8
+; ZVFH-NEXT: vfwcvt.f.x.v v8, v12
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vsitofp_nxv16i8_nxv16f16:
@@ -1597,8 +1597,8 @@ define <vscale x 16 x half> @vuitofp_nxv16i8_nxv16f16(<vscale x 16 x i8> %va) {
; ZVFH-LABEL: vuitofp_nxv16i8_nxv16f16:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetvli a0, zero, e8, m2, ta, ma
-; ZVFH-NEXT: vfwcvt.f.xu.v v12, v8
-; ZVFH-NEXT: vmv4r.v v8, v12
+; ZVFH-NEXT: vmv2r.v v12, v8
+; ZVFH-NEXT: vfwcvt.f.xu.v v8, v12
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vuitofp_nxv16i8_nxv16f16:
@@ -1638,19 +1638,19 @@ define <vscale x 32 x half> @vsitofp_nxv32i8_nxv32f16(<vscale x 32 x i8> %va) {
; ZVFH-LABEL: vsitofp_nxv32i8_nxv32f16:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetvli a0, zero, e8, m4, ta, ma
-; ZVFH-NEXT: vfwcvt.f.x.v v16, v8
-; ZVFH-NEXT: vmv8r.v v8, v16
+; ZVFH-NEXT: vmv4r.v v16, v8
+; ZVFH-NEXT: vfwcvt.f.x.v v8, v16
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vsitofp_nxv32i8_nxv32f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vsext.vf2 v12, v8
-; ZVFHMIN-NEXT: vsext.vf2 v16, v10
-; ZVFHMIN-NEXT: vfwcvt.f.x.v v24, v12
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
-; ZVFHMIN-NEXT: vfwcvt.f.x.v v24, v16
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
+; ZVFHMIN-NEXT: vsext.vf2 v24, v10
+; ZVFHMIN-NEXT: vfwcvt.f.x.v v16, v12
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfwcvt.f.x.v v16, v24
+; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
; ZVFHMIN-NEXT: ret
%evec = sitofp <vscale x 32 x i8> %va to <vscale x 32 x half>
ret <vscale x 32 x half> %evec
@@ -1660,19 +1660,19 @@ define <vscale x 32 x half> @vuitofp_nxv32i8_nxv32f16(<vscale x 32 x i8> %va) {
; ZVFH-LABEL: vuitofp_nxv32i8_nxv32f16:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetvli a0, zero, e8, m4, ta, ma
-; ZVFH-NEXT: vfwcvt.f.xu.v v16, v8
-; ZVFH-NEXT: vmv8r.v v8, v16
+; ZVFH-NEXT: vmv4r.v v16, v8
+; ZVFH-NEXT: vfwcvt.f.xu.v v8, v16
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vuitofp_nxv32i8_nxv32f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vzext.vf2 v12, v8
-; ZVFHMIN-NEXT: vzext.vf2 v16, v10
-; ZVFHMIN-NEXT: vfwcvt.f.xu.v v24, v12
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
-; ZVFHMIN-NEXT: vfwcvt.f.xu.v v24, v16
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
+; ZVFHMIN-NEXT: vzext.vf2 v24, v10
+; ZVFHMIN-NEXT: vfwcvt.f.xu.v v16, v12
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfwcvt.f.xu.v v16, v24
+; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
; ZVFHMIN-NEXT: ret
%evec = uitofp <vscale x 32 x i8> %va to <vscale x 32 x half>
ret <vscale x 32 x half> %evec
@@ -1872,8 +1872,8 @@ define <vscale x 4 x float> @vsitofp_nxv4i16_nxv4f32(<vscale x 4 x i16> %va) {
; CHECK-LABEL: vsitofp_nxv4i16_nxv4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v10
; CHECK-NEXT: ret
%evec = sitofp <vscale x 4 x i16> %va to <vscale x 4 x float>
ret <vscale x 4 x float> %evec
@@ -1883,8 +1883,8 @@ define <vscale x 4 x float> @vuitofp_nxv4i16_nxv4f32(<vscale x 4 x i16> %va) {
; CHECK-LABEL: vuitofp_nxv4i16_nxv4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v10
; CHECK-NEXT: ret
%evec = uitofp <vscale x 4 x i16> %va to <vscale x 4 x float>
ret <vscale x 4 x float> %evec
@@ -1950,8 +1950,8 @@ define <vscale x 8 x float> @vsitofp_nxv8i16_nxv8f32(<vscale x 8 x i16> %va) {
; CHECK-LABEL: vsitofp_nxv8i16_nxv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v12
; CHECK-NEXT: ret
%evec = sitofp <vscale x 8 x i16> %va to <vscale x 8 x float>
ret <vscale x 8 x float> %evec
@@ -1961,8 +1961,8 @@ define <vscale x 8 x float> @vuitofp_nxv8i16_nxv8f32(<vscale x 8 x i16> %va) {
; CHECK-LABEL: vuitofp_nxv8i16_nxv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v12
; CHECK-NEXT: ret
%evec = uitofp <vscale x 8 x i16> %va to <vscale x 8 x float>
ret <vscale x 8 x float> %evec
@@ -2028,8 +2028,8 @@ define <vscale x 16 x float> @vsitofp_nxv16i16_nxv16f32(<vscale x 16 x i16> %va)
; CHECK-LABEL: vsitofp_nxv16i16_nxv16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v16
; CHECK-NEXT: ret
%evec = sitofp <vscale x 16 x i16> %va to <vscale x 16 x float>
ret <vscale x 16 x float> %evec
@@ -2039,8 +2039,8 @@ define <vscale x 16 x float> @vuitofp_nxv16i16_nxv16f32(<vscale x 16 x i16> %va)
; CHECK-LABEL: vuitofp_nxv16i16_nxv16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v16
; CHECK-NEXT: ret
%evec = uitofp <vscale x 16 x i16> %va to <vscale x 16 x float>
ret <vscale x 16 x float> %evec
@@ -2226,8 +2226,8 @@ define <vscale x 2 x double> @vsitofp_nxv2i32_nxv2f64(<vscale x 2 x i32> %va) {
; CHECK-LABEL: vsitofp_nxv2i32_nxv2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v10
; CHECK-NEXT: ret
%evec = sitofp <vscale x 2 x i32> %va to <vscale x 2 x double>
ret <vscale x 2 x double> %evec
@@ -2237,8 +2237,8 @@ define <vscale x 2 x double> @vuitofp_nxv2i32_nxv2f64(<vscale x 2 x i32> %va) {
; CHECK-LABEL: vuitofp_nxv2i32_nxv2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v10
; CHECK-NEXT: ret
%evec = uitofp <vscale x 2 x i32> %va to <vscale x 2 x double>
ret <vscale x 2 x double> %evec
@@ -2306,8 +2306,8 @@ define <vscale x 4 x double> @vsitofp_nxv4i32_nxv4f64(<vscale x 4 x i32> %va) {
; CHECK-LABEL: vsitofp_nxv4i32_nxv4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v12
; CHECK-NEXT: ret
%evec = sitofp <vscale x 4 x i32> %va to <vscale x 4 x double>
ret <vscale x 4 x double> %evec
@@ -2317,8 +2317,8 @@ define <vscale x 4 x double> @vuitofp_nxv4i32_nxv4f64(<vscale x 4 x i32> %va) {
; CHECK-LABEL: vuitofp_nxv4i32_nxv4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v12, v8
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v12
; CHECK-NEXT: ret
%evec = uitofp <vscale x 4 x i32> %va to <vscale x 4 x double>
ret <vscale x 4 x double> %evec
@@ -2386,8 +2386,8 @@ define <vscale x 8 x double> @vsitofp_nxv8i32_nxv8f64(<vscale x 8 x i32> %va) {
; CHECK-LABEL: vsitofp_nxv8i32_nxv8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v16
; CHECK-NEXT: ret
%evec = sitofp <vscale x 8 x i32> %va to <vscale x 8 x double>
ret <vscale x 8 x double> %evec
@@ -2397,8 +2397,8 @@ define <vscale x 8 x double> @vuitofp_nxv8i32_nxv8f64(<vscale x 8 x i32> %va) {
; CHECK-LABEL: vuitofp_nxv8i32_nxv8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v16, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v16
; CHECK-NEXT: ret
%evec = uitofp <vscale x 8 x i32> %va to <vscale x 8 x double>
ret <vscale x 8 x double> %evec
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index 5cd9b77af82cf..10a92f0188a93 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -5073,23 +5073,23 @@ define <vscale x 4 x float> @vfwmaccbf16_vf(<vscale x 4 x float> %a, bfloat %b,
define <vscale x 4 x double> @vfsqrt(<vscale x 4 x float> %a) {
; NOVLOPT-LABEL: vfsqrt:
; NOVLOPT: # %bb.0:
-; NOVLOPT-NEXT: fsrmi a0, 0
; NOVLOPT-NEXT: vsetivli zero, 7, e32, m2, ta, ma
-; NOVLOPT-NEXT: vfsqrt.v v10, v8
+; NOVLOPT-NEXT: vmv2r.v v12, v8
+; NOVLOPT-NEXT: fsrmi a0, 0
+; NOVLOPT-NEXT: vfsqrt.v v14, v8
; NOVLOPT-NEXT: fsrm a0
; NOVLOPT-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; NOVLOPT-NEXT: vfwmacc.vv v12, v8, v10
-; NOVLOPT-NEXT: vmv4r.v v8, v12
+; NOVLOPT-NEXT: vfwmacc.vv v8, v12, v14
; NOVLOPT-NEXT: ret
;
; VLOPT-LABEL: vfsqrt:
; VLOPT: # %bb.0:
-; VLOPT-NEXT: fsrmi a0, 0
; VLOPT-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; VLOPT-NEXT: vfsqrt.v v10, v8
+; VLOPT-NEXT: vmv2r.v v12, v8
+; VLOPT-NEXT: fsrmi a0, 0
+; VLOPT-NEXT: vfsqrt.v v14, v8
; VLOPT-NEXT: fsrm a0
-; VLOPT-NEXT: vfwmacc.vv v12, v8, v10
-; VLOPT-NEXT: vmv4r.v v8, v12
+; VLOPT-NEXT: vfwmacc.vv v8, v12, v14
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x float> @llvm.riscv.vfsqrt.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, iXLen 0, iXLen 7)
%2 = call <vscale x 4 x double> @llvm.riscv.vfwmacc(<vscale x 4 x double> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %1, iXLen 7, iXLen 6, iXLen 0)
@@ -5100,18 +5100,18 @@ define <vscale x 4 x double> @vfrsqrt7(<vscale x 4 x float> %a) {
; NOVLOPT-LABEL: vfrsqrt7:
; NOVLOPT: # %bb.0:
; NOVLOPT-NEXT: vsetivli zero, 7, e32, m2, ta, ma
-; NOVLOPT-NEXT: vfrsqrt7.v v10, v8
+; NOVLOPT-NEXT: vmv2r.v v12, v8
+; NOVLOPT-NEXT: vfrsqrt7.v v14, v8
; NOVLOPT-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; NOVLOPT-NEXT: vfwmacc.vv v12, v8, v10
-; NOVLOPT-NEXT: vmv4r.v v8, v12
+; NOVLOPT-NEXT: vfwmacc.vv v8, v12, v14
; NOVLOPT-NEXT: ret
;
; VLOPT-LABEL: vfrsqrt7:
; VLOPT: # %bb.0:
; VLOPT-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; VLOPT-NEXT: vfrsqrt7.v v10, v8
-; VLOPT-NEXT: vfwmacc.vv v12, v8, v10
-; VLOPT-NEXT: vmv4r.v v8, v12
+; VLOPT-NEXT: vmv2r.v v12, v8
+; VLOPT-NEXT: vfrsqrt7.v v14, v8
+; VLOPT-NEXT: vfwmacc.vv v8, v12, v14
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x float> @llvm.riscv.vfrsqrt7.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, iXLen 7)
%2 = call <vscale x 4 x double> @llvm.riscv.vfwmacc(<vscale x 4 x double> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %1, iXLen 7, iXLen 6, iXLen 0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vloxei.ll b/llvm/test/CodeGen/RISCV/rvv/vloxei.ll
index 65eedbb0cc898..03b4268975d4c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vloxei.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vloxei.ll
@@ -777,8 +777,8 @@ define <vscale x 2 x i64> @intrinsic_vloxei_v_nxv2i64_nxv2i64_nxv2i32(ptr %0, <v
; CHECK-LABEL: intrinsic_vloxei_v_nxv2i64_nxv2i64_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxei32.v v10, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxei32.v v8, (a0), v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i32(
@@ -825,8 +825,8 @@ define <vscale x 4 x i64> @intrinsic_vloxei_v_nxv4i64_nxv4i64_nxv4i32(ptr %0, <v
; CHECK-LABEL: intrinsic_vloxei_v_nxv4i64_nxv4i64_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vloxei32.v v12, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxei32.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i32(
@@ -873,8 +873,8 @@ define <vscale x 8 x i64> @intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i32(ptr %0, <v
; CHECK-LABEL: intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT: vloxei32.v v16, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vloxei32.v v8, (a0), v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i32(
@@ -1444,8 +1444,8 @@ define <vscale x 2 x double> @intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i32(ptr %0,
; CHECK-LABEL: intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxei32.v v10, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxei32.v v8, (a0), v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i32(
@@ -1492,8 +1492,8 @@ define <vscale x 4 x double> @intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i32(ptr %0,
; CHECK-LABEL: intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vloxei32.v v12, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxei32.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i32(
@@ -1540,8 +1540,8 @@ define <vscale x 8 x double> @intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i32(ptr %0,
; CHECK-LABEL: intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT: vloxei32.v v16, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vloxei32.v v8, (a0), v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i32(
@@ -2254,8 +2254,8 @@ define <vscale x 4 x i32> @intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i16(ptr %0, <v
; CHECK-LABEL: intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxei16.v v10, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxei16.v v8, (a0), v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i16(
@@ -2302,8 +2302,8 @@ define <vscale x 8 x i32> @intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i16(ptr %0, <v
; CHECK-LABEL: intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vloxei16.v v12, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxei16.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i16(
@@ -2350,8 +2350,8 @@ define <vscale x 16 x i32> @intrinsic_vloxei_v_nxv16i32_nxv16i32_nxv16i16(ptr %0
; CHECK-LABEL: intrinsic_vloxei_v_nxv16i32_nxv16i32_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vloxei16.v v16, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vloxei16.v v8, (a0), v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i16(
@@ -2446,8 +2446,8 @@ define <vscale x 2 x i64> @intrinsic_vloxei_v_nxv2i64_nxv2i64_nxv2i16(ptr %0, <v
; CHECK-LABEL: intrinsic_vloxei_v_nxv2i64_nxv2i64_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxei16.v v10, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxei16.v v8, (a0), v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i16(
@@ -2494,8 +2494,8 @@ define <vscale x 4 x i64> @intrinsic_vloxei_v_nxv4i64_nxv4i64_nxv4i16(ptr %0, <v
; CHECK-LABEL: intrinsic_vloxei_v_nxv4i64_nxv4i64_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vloxei16.v v12, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxei16.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i16(
@@ -2542,8 +2542,8 @@ define <vscale x 8 x i64> @intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i16(ptr %0, <v
; CHECK-LABEL: intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT: vloxei16.v v16, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: vloxei16.v v8, (a0), v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i16(
@@ -2968,8 +2968,8 @@ define <vscale x 4 x float> @intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i16(ptr %0,
; CHECK-LABEL: intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxei16.v v10, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxei16.v v8, (a0), v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i16(
@@ -3016,8 +3016,8 @@ define <vscale x 8 x float> @intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i16(ptr %0,
; CHECK-LABEL: intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vloxei16.v v12, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxei16.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i16(
@@ -3064,8 +3064,8 @@ define <vscale x 16 x float> @intrinsic_vloxei_v_nxv16f32_nxv16f32_nxv16i16(ptr
; CHECK-LABEL: intrinsic_vloxei_v_nxv16f32_nxv16f32_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vloxei16.v v16, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vloxei16.v v8, (a0), v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i16(
@@ -3160,8 +3160,8 @@ define <vscale x 2 x double> @intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i16(ptr %0,
; CHECK-LABEL: intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxei16.v v10, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxei16.v v8, (a0), v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i16(
@@ -3208,8 +3208,8 @@ define <vscale x 4 x double> @intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i16(ptr %0,
; CHECK-LABEL: intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vloxei16.v v12, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxei16.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i16(
@@ -3256,8 +3256,8 @@ define <vscale x 8 x double> @intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i16(ptr %0,
; CHECK-LABEL: intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT: vloxei16.v v16, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: vloxei16.v v8, (a0), v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i16(
@@ -3777,8 +3777,8 @@ define <vscale x 8 x i16> @intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i8(ptr %0, <vs
; CHECK-LABEL: intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxei8.v v10, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxei8.v v8, (a0), v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i8(
@@ -3825,8 +3825,8 @@ define <vscale x 16 x i16> @intrinsic_vloxei_v_nxv16i16_nxv16i16_nxv16i8(ptr %0,
; CHECK-LABEL: intrinsic_vloxei_v_nxv16i16_nxv16i16_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vloxei8.v v12, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxei8.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i8(
@@ -3873,8 +3873,8 @@ define <vscale x 32 x i16> @intrinsic_vloxei_v_nxv32i16_nxv32i16_nxv32i8(ptr %0,
; CHECK-LABEL: intrinsic_vloxei_v_nxv32i16_nxv32i16_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; CHECK-NEXT: vloxei8.v v16, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vloxei8.v v8, (a0), v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i8(
@@ -4017,8 +4017,8 @@ define <vscale x 4 x i32> @intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i8(ptr %0, <vs
; CHECK-LABEL: intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxei8.v v10, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxei8.v v8, (a0), v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i8(
@@ -4065,8 +4065,8 @@ define <vscale x 8 x i32> @intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i8(ptr %0, <vs
; CHECK-LABEL: intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vloxei8.v v12, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxei8.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i8(
@@ -4113,8 +4113,8 @@ define <vscale x 16 x i32> @intrinsic_vloxei_v_nxv16i32_nxv16i32_nxv16i8(ptr %0,
; CHECK-LABEL: intrinsic_vloxei_v_nxv16i32_nxv16i32_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vloxei8.v v16, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: vloxei8.v v8, (a0), v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i8(
@@ -4209,8 +4209,8 @@ define <vscale x 2 x i64> @intrinsic_vloxei_v_nxv2i64_nxv2i64_nxv2i8(ptr %0, <vs
; CHECK-LABEL: intrinsic_vloxei_v_nxv2i64_nxv2i64_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxei8.v v10, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxei8.v v8, (a0), v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i8(
@@ -4257,8 +4257,8 @@ define <vscale x 4 x i64> @intrinsic_vloxei_v_nxv4i64_nxv4i64_nxv4i8(ptr %0, <vs
; CHECK-LABEL: intrinsic_vloxei_v_nxv4i64_nxv4i64_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vloxei8.v v12, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxei8.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i8(
@@ -4305,8 +4305,8 @@ define <vscale x 8 x i64> @intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i8(ptr %0, <vs
; CHECK-LABEL: intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT: vloxei8.v v16, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv1r.v v16, v8
+; CHECK-NEXT: vloxei8.v v8, (a0), v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i8(
@@ -4497,8 +4497,8 @@ define <vscale x 8 x half> @intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i8(ptr %0, <v
; CHECK-LABEL: intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxei8.v v10, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxei8.v v8, (a0), v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i8(
@@ -4545,8 +4545,8 @@ define <vscale x 16 x half> @intrinsic_vloxei_v_nxv16f16_nxv16f16_nxv16i8(ptr %0
; CHECK-LABEL: intrinsic_vloxei_v_nxv16f16_nxv16f16_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vloxei8.v v12, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxei8.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x half> @llvm.riscv.vloxei.nxv16f16.nxv16i8(
@@ -4593,8 +4593,8 @@ define <vscale x 32 x half> @intrinsic_vloxei_v_nxv32f16_nxv32f16_nxv32i8(ptr %0
; CHECK-LABEL: intrinsic_vloxei_v_nxv32f16_nxv32f16_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; CHECK-NEXT: vloxei8.v v16, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vloxei8.v v8, (a0), v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x half> @llvm.riscv.vloxei.nxv32f16.nxv32i8(
@@ -4977,8 +4977,8 @@ define <vscale x 4 x float> @intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i8(ptr %0, <
; CHECK-LABEL: intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxei8.v v10, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxei8.v v8, (a0), v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i8(
@@ -5025,8 +5025,8 @@ define <vscale x 8 x float> @intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i8(ptr %0, <
; CHECK-LABEL: intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vloxei8.v v12, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxei8.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i8(
@@ -5073,8 +5073,8 @@ define <vscale x 16 x float> @intrinsic_vloxei_v_nxv16f32_nxv16f32_nxv16i8(ptr %
; CHECK-LABEL: intrinsic_vloxei_v_nxv16f32_nxv16f32_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vloxei8.v v16, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: vloxei8.v v8, (a0), v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i8(
@@ -5169,8 +5169,8 @@ define <vscale x 2 x double> @intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i8(ptr %0,
; CHECK-LABEL: intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxei8.v v10, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxei8.v v8, (a0), v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i8(
@@ -5217,8 +5217,8 @@ define <vscale x 4 x double> @intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i8(ptr %0,
; CHECK-LABEL: intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vloxei8.v v12, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxei8.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i8(
@@ -5265,8 +5265,8 @@ define <vscale x 8 x double> @intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i8(ptr %0,
; CHECK-LABEL: intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT: vloxei8.v v16, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv1r.v v16, v8
+; CHECK-NEXT: vloxei8.v v8, (a0), v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i8(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vluxei.ll b/llvm/test/CodeGen/RISCV/rvv/vluxei.ll
index 240f377be1ce3..13c9f3304183e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vluxei.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vluxei.ll
@@ -777,8 +777,8 @@ define <vscale x 2 x i64> @intrinsic_vluxei_v_nxv2i64_nxv2i64_nxv2i32(ptr %0, <v
; CHECK-LABEL: intrinsic_vluxei_v_nxv2i64_nxv2i64_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxei32.v v10, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxei32.v v8, (a0), v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i32(
@@ -825,8 +825,8 @@ define <vscale x 4 x i64> @intrinsic_vluxei_v_nxv4i64_nxv4i64_nxv4i32(ptr %0, <v
; CHECK-LABEL: intrinsic_vluxei_v_nxv4i64_nxv4i64_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vluxei32.v v12, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxei32.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i32(
@@ -873,8 +873,8 @@ define <vscale x 8 x i64> @intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i32(ptr %0, <v
; CHECK-LABEL: intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT: vluxei32.v v16, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vluxei32.v v8, (a0), v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i32(
@@ -1684,8 +1684,8 @@ define <vscale x 2 x double> @intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i32(ptr %0,
; CHECK-LABEL: intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxei32.v v10, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxei32.v v8, (a0), v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i32(
@@ -1732,8 +1732,8 @@ define <vscale x 4 x double> @intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i32(ptr %0,
; CHECK-LABEL: intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vluxei32.v v12, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxei32.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i32(
@@ -1780,8 +1780,8 @@ define <vscale x 8 x double> @intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i32(ptr %0,
; CHECK-LABEL: intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT: vluxei32.v v16, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vluxei32.v v8, (a0), v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i32(
@@ -2494,8 +2494,8 @@ define <vscale x 4 x i32> @intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i16(ptr %0, <v
; CHECK-LABEL: intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxei16.v v10, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxei16.v v8, (a0), v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i16(
@@ -2542,8 +2542,8 @@ define <vscale x 8 x i32> @intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i16(ptr %0, <v
; CHECK-LABEL: intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vluxei16.v v12, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxei16.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i16(
@@ -2590,8 +2590,8 @@ define <vscale x 16 x i32> @intrinsic_vluxei_v_nxv16i32_nxv16i32_nxv16i16(ptr %0
; CHECK-LABEL: intrinsic_vluxei_v_nxv16i32_nxv16i32_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vluxei16.v v16, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vluxei16.v v8, (a0), v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i16(
@@ -2686,8 +2686,8 @@ define <vscale x 2 x i64> @intrinsic_vluxei_v_nxv2i64_nxv2i64_nxv2i16(ptr %0, <v
; CHECK-LABEL: intrinsic_vluxei_v_nxv2i64_nxv2i64_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxei16.v v10, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxei16.v v8, (a0), v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i16(
@@ -2734,8 +2734,8 @@ define <vscale x 4 x i64> @intrinsic_vluxei_v_nxv4i64_nxv4i64_nxv4i16(ptr %0, <v
; CHECK-LABEL: intrinsic_vluxei_v_nxv4i64_nxv4i64_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vluxei16.v v12, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxei16.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i16(
@@ -2782,8 +2782,8 @@ define <vscale x 8 x i64> @intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i16(ptr %0, <v
; CHECK-LABEL: intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT: vluxei16.v v16, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: vluxei16.v v8, (a0), v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i16(
@@ -3208,8 +3208,8 @@ define <vscale x 4 x float> @intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i16(ptr %0,
; CHECK-LABEL: intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxei16.v v10, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxei16.v v8, (a0), v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i16(
@@ -3256,8 +3256,8 @@ define <vscale x 8 x float> @intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i16(ptr %0,
; CHECK-LABEL: intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vluxei16.v v12, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxei16.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i16(
@@ -3304,8 +3304,8 @@ define <vscale x 16 x float> @intrinsic_vluxei_v_nxv16f32_nxv16f32_nxv16i16(ptr
; CHECK-LABEL: intrinsic_vluxei_v_nxv16f32_nxv16f32_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vluxei16.v v16, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vluxei16.v v8, (a0), v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i16(
@@ -3400,8 +3400,8 @@ define <vscale x 2 x double> @intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i16(ptr %0,
; CHECK-LABEL: intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxei16.v v10, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxei16.v v8, (a0), v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i16(
@@ -3448,8 +3448,8 @@ define <vscale x 4 x double> @intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i16(ptr %0,
; CHECK-LABEL: intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vluxei16.v v12, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxei16.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i16(
@@ -3496,8 +3496,8 @@ define <vscale x 8 x double> @intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i16(ptr %0,
; CHECK-LABEL: intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT: vluxei16.v v16, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: vluxei16.v v8, (a0), v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i16(
@@ -4017,8 +4017,8 @@ define <vscale x 8 x i16> @intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i8(ptr %0, <vs
; CHECK-LABEL: intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxei8.v v10, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxei8.v v8, (a0), v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i8(
@@ -4065,8 +4065,8 @@ define <vscale x 16 x i16> @intrinsic_vluxei_v_nxv16i16_nxv16i16_nxv16i8(ptr %0,
; CHECK-LABEL: intrinsic_vluxei_v_nxv16i16_nxv16i16_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vluxei8.v v12, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxei8.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i8(
@@ -4113,8 +4113,8 @@ define <vscale x 32 x i16> @intrinsic_vluxei_v_nxv32i16_nxv32i16_nxv32i8(ptr %0,
; CHECK-LABEL: intrinsic_vluxei_v_nxv32i16_nxv32i16_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; CHECK-NEXT: vluxei8.v v16, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vluxei8.v v8, (a0), v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i8(
@@ -4257,8 +4257,8 @@ define <vscale x 4 x i32> @intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i8(ptr %0, <vs
; CHECK-LABEL: intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxei8.v v10, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxei8.v v8, (a0), v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i8(
@@ -4305,8 +4305,8 @@ define <vscale x 8 x i32> @intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i8(ptr %0, <vs
; CHECK-LABEL: intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vluxei8.v v12, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxei8.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i8(
@@ -4353,8 +4353,8 @@ define <vscale x 16 x i32> @intrinsic_vluxei_v_nxv16i32_nxv16i32_nxv16i8(ptr %0,
; CHECK-LABEL: intrinsic_vluxei_v_nxv16i32_nxv16i32_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vluxei8.v v16, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: vluxei8.v v8, (a0), v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i8(
@@ -4449,8 +4449,8 @@ define <vscale x 2 x i64> @intrinsic_vluxei_v_nxv2i64_nxv2i64_nxv2i8(ptr %0, <vs
; CHECK-LABEL: intrinsic_vluxei_v_nxv2i64_nxv2i64_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxei8.v v10, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxei8.v v8, (a0), v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i8(
@@ -4497,8 +4497,8 @@ define <vscale x 4 x i64> @intrinsic_vluxei_v_nxv4i64_nxv4i64_nxv4i8(ptr %0, <vs
; CHECK-LABEL: intrinsic_vluxei_v_nxv4i64_nxv4i64_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vluxei8.v v12, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxei8.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i8(
@@ -4545,8 +4545,8 @@ define <vscale x 8 x i64> @intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i8(ptr %0, <vs
; CHECK-LABEL: intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT: vluxei8.v v16, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv1r.v v16, v8
+; CHECK-NEXT: vluxei8.v v8, (a0), v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i8(
@@ -4737,8 +4737,8 @@ define <vscale x 8 x half> @intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i8(ptr %0, <v
; CHECK-LABEL: intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxei8.v v10, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxei8.v v8, (a0), v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i8(
@@ -4785,8 +4785,8 @@ define <vscale x 16 x half> @intrinsic_vluxei_v_nxv16f16_nxv16f16_nxv16i8(ptr %0
; CHECK-LABEL: intrinsic_vluxei_v_nxv16f16_nxv16f16_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vluxei8.v v12, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxei8.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x half> @llvm.riscv.vluxei.nxv16f16.nxv16i8(
@@ -4833,8 +4833,8 @@ define <vscale x 32 x half> @intrinsic_vluxei_v_nxv32f16_nxv32f16_nxv32i8(ptr %0
; CHECK-LABEL: intrinsic_vluxei_v_nxv32f16_nxv32f16_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; CHECK-NEXT: vluxei8.v v16, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vluxei8.v v8, (a0), v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x half> @llvm.riscv.vluxei.nxv32f16.nxv32i8(
@@ -4977,8 +4977,8 @@ define <vscale x 4 x float> @intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i8(ptr %0, <
; CHECK-LABEL: intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxei8.v v10, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxei8.v v8, (a0), v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i8(
@@ -5025,8 +5025,8 @@ define <vscale x 8 x float> @intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i8(ptr %0, <
; CHECK-LABEL: intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vluxei8.v v12, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxei8.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i8(
@@ -5073,8 +5073,8 @@ define <vscale x 16 x float> @intrinsic_vluxei_v_nxv16f32_nxv16f32_nxv16i8(ptr %
; CHECK-LABEL: intrinsic_vluxei_v_nxv16f32_nxv16f32_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vluxei8.v v16, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: vluxei8.v v8, (a0), v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i8(
@@ -5169,8 +5169,8 @@ define <vscale x 2 x double> @intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i8(ptr %0,
; CHECK-LABEL: intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxei8.v v10, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxei8.v v8, (a0), v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i8(
@@ -5217,8 +5217,8 @@ define <vscale x 4 x double> @intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i8(ptr %0,
; CHECK-LABEL: intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vluxei8.v v12, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxei8.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i8(
@@ -5265,8 +5265,8 @@ define <vscale x 8 x double> @intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i8(ptr %0,
; CHECK-LABEL: intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT: vluxei8.v v16, (a0), v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv1r.v v16, v8
+; CHECK-NEXT: vluxei8.v v8, (a0), v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i8(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-inttoptr-ptrtoint.ll b/llvm/test/CodeGen/RISCV/rvv/vp-inttoptr-ptrtoint.ll
index e206444e4ef22..df003907dc360 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vp-inttoptr-ptrtoint.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-inttoptr-ptrtoint.ll
@@ -7,8 +7,8 @@ define <vscale x 4 x ptr> @inttoptr_nxv4p0_nxv4i8(<vscale x 4 x i8> %va, <vscale
; CHECK-LABEL: inttoptr_nxv4p0_nxv4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT: vzext.vf8 v12, v8, v0.t
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vzext.vf8 v8, v12, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 4 x ptr> @llvm.vp.inttoptr.nxv4p0.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x ptr> %v
@@ -20,8 +20,8 @@ define <vscale x 4 x ptr> @inttoptr_nxv4p0_nxv4i16(<vscale x 4 x i16> %va, <vsca
; CHECK-LABEL: inttoptr_nxv4p0_nxv4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT: vzext.vf4 v12, v8, v0.t
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vzext.vf4 v8, v12, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 4 x ptr> @llvm.vp.inttoptr.nxv4p0.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x ptr> %v
@@ -33,8 +33,8 @@ define <vscale x 4 x ptr> @inttoptr_nxv4p0_nxv4i32(<vscale x 4 x i32> %va, <vsca
; CHECK-LABEL: inttoptr_nxv4p0_nxv4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT: vzext.vf2 v12, v8, v0.t
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vzext.vf2 v8, v12, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 4 x ptr> @llvm.vp.inttoptr.nxv4p0.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x ptr> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
index 00d27dd583c8a..d6e1af59e6341 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
@@ -575,13 +575,13 @@ define void @not_balanced_store_tree(<vscale x 1 x i32> %v0, <vscale x 2 x i32>
; RV32-NEXT: vsetvli zero, a4, e32, m1, ta, ma
; RV32-NEXT: vslideup.vx v12, v8, a3
; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma
-; RV32-NEXT: vwaddu.vv v14, v12, v9
-; RV32-NEXT: vwmaccu.vx v14, a2, v9
+; RV32-NEXT: vwaddu.vv v16, v12, v9
+; RV32-NEXT: vwmaccu.vx v16, a2, v9
; RV32-NEXT: vsetvli a3, zero, e32, m2, ta, ma
-; RV32-NEXT: vwaddu.vv v16, v14, v10
-; RV32-NEXT: vwmaccu.vx v16, a2, v10
+; RV32-NEXT: vwaddu.vv v12, v16, v10
+; RV32-NEXT: vwmaccu.vx v12, a2, v10
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; RV32-NEXT: vse32.v v16, (a0)
+; RV32-NEXT: vse32.v v12, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: not_balanced_store_tree:
@@ -599,14 +599,14 @@ define void @not_balanced_store_tree(<vscale x 1 x i32> %v0, <vscale x 2 x i32>
; RV64-NEXT: vsetvli zero, a4, e32, m1, ta, ma
; RV64-NEXT: vslideup.vx v12, v8, a3
; RV64-NEXT: vsetvli a3, zero, e32, m1, ta, ma
-; RV64-NEXT: vwaddu.vv v14, v12, v9
-; RV64-NEXT: vwmaccu.vx v14, a2, v9
+; RV64-NEXT: vwaddu.vv v16, v12, v9
+; RV64-NEXT: vwmaccu.vx v16, a2, v9
; RV64-NEXT: vsetvli a3, zero, e32, m2, ta, ma
-; RV64-NEXT: vwaddu.vv v16, v14, v10
-; RV64-NEXT: vwmaccu.vx v16, a2, v10
+; RV64-NEXT: vwaddu.vv v12, v16, v10
+; RV64-NEXT: vwmaccu.vx v12, a2, v10
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; RV64-NEXT: vse32.v v16, (a0)
+; RV64-NEXT: vse32.v v12, (a0)
; RV64-NEXT: ret
%rvl = mul i32 %evl, 4
%interleaved.vec0 = call <vscale x 2 x i32> @llvm.vector.interleave2.nxv2i32(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
index 68e7297605be2..b6ec7906885ff 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
@@ -936,8 +936,8 @@ define <vscale x 2 x i64> @vpgather_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x
; RV32-LABEL: vpgather_nxv2i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
-; RV32-NEXT: vmv.v.v v8, v10
+; RV32-NEXT: vmv1r.v v10, v8
+; RV32-NEXT: vluxei32.v v8, (zero), v10, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv2i64:
@@ -955,8 +955,8 @@ define <vscale x 4 x i64> @vpgather_nxv4i64(<vscale x 4 x ptr> %ptrs, <vscale x
; RV32-LABEL: vpgather_nxv4i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t
-; RV32-NEXT: vmv.v.v v8, v12
+; RV32-NEXT: vmv2r.v v12, v8
+; RV32-NEXT: vluxei32.v v8, (zero), v12, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv4i64:
@@ -972,8 +972,8 @@ define <vscale x 4 x i64> @vpgather_truemask_nxv4i64(<vscale x 4 x ptr> %ptrs, i
; RV32-LABEL: vpgather_truemask_nxv4i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vluxei32.v v12, (zero), v8
-; RV32-NEXT: vmv.v.v v8, v12
+; RV32-NEXT: vmv2r.v v12, v8
+; RV32-NEXT: vluxei32.v v8, (zero), v12
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_truemask_nxv4i64:
@@ -991,8 +991,8 @@ define <vscale x 8 x i64> @vpgather_nxv8i64(<vscale x 8 x ptr> %ptrs, <vscale x
; RV32-LABEL: vpgather_nxv8i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t
-; RV32-NEXT: vmv.v.v v8, v16
+; RV32-NEXT: vmv4r.v v16, v8
+; RV32-NEXT: vluxei32.v v8, (zero), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv8i64:
@@ -1885,8 +1885,8 @@ define <vscale x 2 x double> @vpgather_nxv2f64(<vscale x 2 x ptr> %ptrs, <vscale
; RV32-LABEL: vpgather_nxv2f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
-; RV32-NEXT: vmv.v.v v8, v10
+; RV32-NEXT: vmv1r.v v10, v8
+; RV32-NEXT: vluxei32.v v8, (zero), v10, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv2f64:
@@ -1904,8 +1904,8 @@ define <vscale x 4 x double> @vpgather_nxv4f64(<vscale x 4 x ptr> %ptrs, <vscale
; RV32-LABEL: vpgather_nxv4f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t
-; RV32-NEXT: vmv.v.v v8, v12
+; RV32-NEXT: vmv2r.v v12, v8
+; RV32-NEXT: vluxei32.v v8, (zero), v12, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv4f64:
@@ -1921,8 +1921,8 @@ define <vscale x 4 x double> @vpgather_truemask_nxv4f64(<vscale x 4 x ptr> %ptrs
; RV32-LABEL: vpgather_truemask_nxv4f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vluxei32.v v12, (zero), v8
-; RV32-NEXT: vmv.v.v v8, v12
+; RV32-NEXT: vmv2r.v v12, v8
+; RV32-NEXT: vluxei32.v v8, (zero), v12
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_truemask_nxv4f64:
@@ -1940,8 +1940,8 @@ define <vscale x 6 x double> @vpgather_nxv6f64(<vscale x 6 x ptr> %ptrs, <vscale
; RV32-LABEL: vpgather_nxv6f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t
-; RV32-NEXT: vmv.v.v v8, v16
+; RV32-NEXT: vmv4r.v v16, v8
+; RV32-NEXT: vluxei32.v v8, (zero), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv6f64:
@@ -2186,8 +2186,8 @@ define <vscale x 8 x double> @vpgather_nxv8f64(<vscale x 8 x ptr> %ptrs, <vscale
; RV32-LABEL: vpgather_nxv8f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t
-; RV32-NEXT: vmv.v.v v8, v16
+; RV32-NEXT: vmv4r.v v16, v8
+; RV32-NEXT: vluxei32.v v8, (zero), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv8f64:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
index 414c059345498..b9767638cdf82 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
@@ -560,7 +560,6 @@ define <vscale x 128 x i8> @vpmerge_vv_nxv128i8(<vscale x 128 x i8> %va, <vscale
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma
; CHECK-NEXT: vmv1r.v v7, v0
-; CHECK-NEXT: vmv8r.v v24, v16
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a1, vlenb
@@ -568,22 +567,23 @@ define <vscale x 128 x i8> @vpmerge_vv_nxv128i8(<vscale x 128 x i8> %va, <vscale
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a2, a0, a1
; CHECK-NEXT: sub a4, a3, a1
-; CHECK-NEXT: vl8r.v v16, (a2)
+; CHECK-NEXT: vl8r.v v24, (a2)
; CHECK-NEXT: sltu a2, a3, a4
; CHECK-NEXT: vl8r.v v8, (a0)
; CHECK-NEXT: addi a2, a2, -1
; CHECK-NEXT: and a2, a2, a4
; CHECK-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0
+; CHECK-NEXT: vmerge.vvm v24, v24, v16, v0
; CHECK-NEXT: bltu a3, a1, .LBB35_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a3, a1
; CHECK-NEXT: .LBB35_2:
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a3, e8, m8, tu, ma
-; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0
+; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT: vmv8r.v v16, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrol-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vrol-sdnode.ll
index cb925bfb0f237..25a226e60e715 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vrol-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrol-sdnode.ll
@@ -1008,12 +1008,12 @@ define <vscale x 2 x i64> @vrol_vx_nxv2i64(<vscale x 2 x i64> %a, i64 %b) {
; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; CHECK-RV32-NEXT: vmv.v.x v10, a0
; CHECK-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.v.i v12, 0
-; CHECK-RV32-NEXT: vwsub.vx v14, v12, a0
+; CHECK-RV32-NEXT: vmv.v.i v14, 0
+; CHECK-RV32-NEXT: vwsub.vx v12, v14, a0
; CHECK-RV32-NEXT: li a0, 63
; CHECK-RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-RV32-NEXT: vand.vx v10, v10, a0
-; CHECK-RV32-NEXT: vand.vx v12, v14, a0
+; CHECK-RV32-NEXT: vand.vx v12, v12, a0
; CHECK-RV32-NEXT: vsrl.vv v12, v8, v12
; CHECK-RV32-NEXT: vsll.vv v8, v8, v10
; CHECK-RV32-NEXT: vor.vv v8, v8, v12
@@ -1071,12 +1071,12 @@ define <vscale x 4 x i64> @vrol_vx_nxv4i64(<vscale x 4 x i64> %a, i64 %b) {
; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; CHECK-RV32-NEXT: vmv.v.x v12, a0
; CHECK-RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-RV32-NEXT: vmv.v.i v16, 0
-; CHECK-RV32-NEXT: vwsub.vx v20, v16, a0
+; CHECK-RV32-NEXT: vmv.v.i v20, 0
+; CHECK-RV32-NEXT: vwsub.vx v16, v20, a0
; CHECK-RV32-NEXT: li a0, 63
; CHECK-RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-RV32-NEXT: vand.vx v12, v12, a0
-; CHECK-RV32-NEXT: vand.vx v16, v20, a0
+; CHECK-RV32-NEXT: vand.vx v16, v16, a0
; CHECK-RV32-NEXT: vsrl.vv v16, v8, v16
; CHECK-RV32-NEXT: vsll.vv v8, v8, v12
; CHECK-RV32-NEXT: vor.vv v8, v8, v16
@@ -1134,12 +1134,12 @@ define <vscale x 8 x i64> @vrol_vx_nxv8i64(<vscale x 8 x i64> %a, i64 %b) {
; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; CHECK-RV32-NEXT: vmv.v.x v16, a0
; CHECK-RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-RV32-NEXT: vmv.v.i v24, 0
-; CHECK-RV32-NEXT: vwsub.vx v0, v24, a0
+; CHECK-RV32-NEXT: vmv.v.i v4, 0
+; CHECK-RV32-NEXT: vwsub.vx v24, v4, a0
; CHECK-RV32-NEXT: li a0, 63
; CHECK-RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-RV32-NEXT: vand.vx v16, v16, a0
-; CHECK-RV32-NEXT: vand.vx v24, v0, a0
+; CHECK-RV32-NEXT: vand.vx v24, v24, a0
; CHECK-RV32-NEXT: vsrl.vv v24, v8, v24
; CHECK-RV32-NEXT: vsll.vv v8, v8, v16
; CHECK-RV32-NEXT: vor.vv v8, v8, v24
diff --git a/llvm/test/CodeGen/RISCV/rvv/vror-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vror-sdnode.ll
index 74743f9068a64..9e63b613ab70b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vror-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vror-sdnode.ll
@@ -1714,12 +1714,12 @@ define <vscale x 2 x i64> @vror_vx_nxv2i64(<vscale x 2 x i64> %a, i64 %b) {
; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; CHECK-RV32-NEXT: vmv.v.x v10, a0
; CHECK-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-RV32-NEXT: vmv.v.i v12, 0
-; CHECK-RV32-NEXT: vwsub.vx v14, v12, a0
+; CHECK-RV32-NEXT: vmv.v.i v14, 0
+; CHECK-RV32-NEXT: vwsub.vx v12, v14, a0
; CHECK-RV32-NEXT: li a0, 63
; CHECK-RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-RV32-NEXT: vand.vx v10, v10, a0
-; CHECK-RV32-NEXT: vand.vx v12, v14, a0
+; CHECK-RV32-NEXT: vand.vx v12, v12, a0
; CHECK-RV32-NEXT: vsll.vv v12, v8, v12
; CHECK-RV32-NEXT: vsrl.vv v8, v8, v10
; CHECK-RV32-NEXT: vor.vv v8, v8, v12
@@ -1816,12 +1816,12 @@ define <vscale x 4 x i64> @vror_vx_nxv4i64(<vscale x 4 x i64> %a, i64 %b) {
; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; CHECK-RV32-NEXT: vmv.v.x v12, a0
; CHECK-RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-RV32-NEXT: vmv.v.i v16, 0
-; CHECK-RV32-NEXT: vwsub.vx v20, v16, a0
+; CHECK-RV32-NEXT: vmv.v.i v20, 0
+; CHECK-RV32-NEXT: vwsub.vx v16, v20, a0
; CHECK-RV32-NEXT: li a0, 63
; CHECK-RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-RV32-NEXT: vand.vx v12, v12, a0
-; CHECK-RV32-NEXT: vand.vx v16, v20, a0
+; CHECK-RV32-NEXT: vand.vx v16, v16, a0
; CHECK-RV32-NEXT: vsll.vv v16, v8, v16
; CHECK-RV32-NEXT: vsrl.vv v8, v8, v12
; CHECK-RV32-NEXT: vor.vv v8, v8, v16
@@ -1918,12 +1918,12 @@ define <vscale x 8 x i64> @vror_vx_nxv8i64(<vscale x 8 x i64> %a, i64 %b) {
; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; CHECK-RV32-NEXT: vmv.v.x v16, a0
; CHECK-RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-RV32-NEXT: vmv.v.i v24, 0
-; CHECK-RV32-NEXT: vwsub.vx v0, v24, a0
+; CHECK-RV32-NEXT: vmv.v.i v4, 0
+; CHECK-RV32-NEXT: vwsub.vx v24, v4, a0
; CHECK-RV32-NEXT: li a0, 63
; CHECK-RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-RV32-NEXT: vand.vx v16, v16, a0
-; CHECK-RV32-NEXT: vand.vx v24, v0, a0
+; CHECK-RV32-NEXT: vand.vx v24, v24, a0
; CHECK-RV32-NEXT: vsll.vv v24, v8, v24
; CHECK-RV32-NEXT: vsrl.vv v8, v8, v16
; CHECK-RV32-NEXT: vor.vv v8, v8, v24
diff --git a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
index d2ef711fc0d74..206838917d004 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
@@ -107,32 +107,32 @@ define <vscale x 2 x i32> @vwop_vscale_sext_i16i32_multiple_users(ptr %x, ptr %y
define <vscale x 2 x i64> @vwop_vscale_sext_i32i64_multiple_users(ptr %x, ptr %y, ptr %z) {
; NO_FOLDING-LABEL: vwop_vscale_sext_i32i64_multiple_users:
; NO_FOLDING: # %bb.0:
-; NO_FOLDING-NEXT: vl1re32.v v8, (a0)
-; NO_FOLDING-NEXT: vl1re32.v v9, (a1)
-; NO_FOLDING-NEXT: vl1re32.v v10, (a2)
+; NO_FOLDING-NEXT: vl1re32.v v10, (a0)
+; NO_FOLDING-NEXT: vl1re32.v v12, (a1)
+; NO_FOLDING-NEXT: vl1re32.v v14, (a2)
; NO_FOLDING-NEXT: vsetvli a0, zero, e64, m2, ta, ma
-; NO_FOLDING-NEXT: vsext.vf2 v12, v8
-; NO_FOLDING-NEXT: vsext.vf2 v14, v9
; NO_FOLDING-NEXT: vsext.vf2 v8, v10
-; NO_FOLDING-NEXT: vmul.vv v10, v12, v14
-; NO_FOLDING-NEXT: vadd.vv v14, v12, v8
-; NO_FOLDING-NEXT: vsub.vv v8, v12, v8
+; NO_FOLDING-NEXT: vsext.vf2 v10, v12
+; NO_FOLDING-NEXT: vsext.vf2 v12, v14
+; NO_FOLDING-NEXT: vmul.vv v10, v8, v10
+; NO_FOLDING-NEXT: vadd.vv v14, v8, v12
+; NO_FOLDING-NEXT: vsub.vv v8, v8, v12
; NO_FOLDING-NEXT: vor.vv v10, v10, v14
; NO_FOLDING-NEXT: vor.vv v8, v10, v8
; NO_FOLDING-NEXT: ret
;
; FOLDING-LABEL: vwop_vscale_sext_i32i64_multiple_users:
; FOLDING: # %bb.0:
-; FOLDING-NEXT: vl1re32.v v8, (a0)
-; FOLDING-NEXT: vl1re32.v v9, (a1)
-; FOLDING-NEXT: vl1re32.v v10, (a2)
+; FOLDING-NEXT: vl1re32.v v14, (a0)
+; FOLDING-NEXT: vl1re32.v v10, (a1)
+; FOLDING-NEXT: vl1re32.v v15, (a2)
; FOLDING-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; FOLDING-NEXT: vwmul.vv v12, v8, v9
-; FOLDING-NEXT: vwadd.vv v14, v8, v10
-; FOLDING-NEXT: vwsub.vv v16, v8, v10
+; FOLDING-NEXT: vwmul.vv v8, v14, v10
+; FOLDING-NEXT: vwadd.vv v10, v14, v15
+; FOLDING-NEXT: vwsub.vv v12, v14, v15
; FOLDING-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; FOLDING-NEXT: vor.vv v8, v12, v14
-; FOLDING-NEXT: vor.vv v8, v8, v16
+; FOLDING-NEXT: vor.vv v8, v8, v10
+; FOLDING-NEXT: vor.vv v8, v8, v12
; FOLDING-NEXT: ret
%a = load <vscale x 2 x i32>, ptr %x
%b = load <vscale x 2 x i32>, ptr %y
@@ -398,32 +398,32 @@ define <vscale x 2 x i32> @vwop_vscale_zext_i16i32_multiple_users(ptr %x, ptr %y
define <vscale x 2 x i64> @vwop_vscale_zext_i32i64_multiple_users(ptr %x, ptr %y, ptr %z) {
; NO_FOLDING-LABEL: vwop_vscale_zext_i32i64_multiple_users:
; NO_FOLDING: # %bb.0:
-; NO_FOLDING-NEXT: vl1re32.v v8, (a0)
-; NO_FOLDING-NEXT: vl1re32.v v9, (a1)
-; NO_FOLDING-NEXT: vl1re32.v v10, (a2)
+; NO_FOLDING-NEXT: vl1re32.v v10, (a0)
+; NO_FOLDING-NEXT: vl1re32.v v12, (a1)
+; NO_FOLDING-NEXT: vl1re32.v v14, (a2)
; NO_FOLDING-NEXT: vsetvli a0, zero, e64, m2, ta, ma
-; NO_FOLDING-NEXT: vzext.vf2 v12, v8
-; NO_FOLDING-NEXT: vzext.vf2 v14, v9
; NO_FOLDING-NEXT: vzext.vf2 v8, v10
-; NO_FOLDING-NEXT: vmul.vv v10, v12, v14
-; NO_FOLDING-NEXT: vadd.vv v14, v12, v8
-; NO_FOLDING-NEXT: vsub.vv v8, v12, v8
+; NO_FOLDING-NEXT: vzext.vf2 v10, v12
+; NO_FOLDING-NEXT: vzext.vf2 v12, v14
+; NO_FOLDING-NEXT: vmul.vv v10, v8, v10
+; NO_FOLDING-NEXT: vadd.vv v14, v8, v12
+; NO_FOLDING-NEXT: vsub.vv v8, v8, v12
; NO_FOLDING-NEXT: vor.vv v10, v10, v14
; NO_FOLDING-NEXT: vor.vv v8, v10, v8
; NO_FOLDING-NEXT: ret
;
; FOLDING-LABEL: vwop_vscale_zext_i32i64_multiple_users:
; FOLDING: # %bb.0:
-; FOLDING-NEXT: vl1re32.v v8, (a0)
-; FOLDING-NEXT: vl1re32.v v9, (a1)
-; FOLDING-NEXT: vl1re32.v v10, (a2)
+; FOLDING-NEXT: vl1re32.v v14, (a0)
+; FOLDING-NEXT: vl1re32.v v10, (a1)
+; FOLDING-NEXT: vl1re32.v v15, (a2)
; FOLDING-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; FOLDING-NEXT: vwmulu.vv v12, v8, v9
-; FOLDING-NEXT: vwaddu.vv v14, v8, v10
-; FOLDING-NEXT: vwsubu.vv v16, v8, v10
+; FOLDING-NEXT: vwmulu.vv v8, v14, v10
+; FOLDING-NEXT: vwaddu.vv v10, v14, v15
+; FOLDING-NEXT: vwsubu.vv v12, v14, v15
; FOLDING-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; FOLDING-NEXT: vor.vv v8, v12, v14
-; FOLDING-NEXT: vor.vv v8, v8, v16
+; FOLDING-NEXT: vor.vv v8, v8, v10
+; FOLDING-NEXT: vor.vv v8, v8, v12
; FOLDING-NEXT: ret
%a = load <vscale x 2 x i32>, ptr %x
%b = load <vscale x 2 x i32>, ptr %y
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsext-vp.ll
index 3c91131fe4d12..eec2a5f3efcfb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsext-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsext-vp.ll
@@ -56,8 +56,8 @@ define <vscale x 2 x i64> @vsext_nxv2i8_nxv2i64(<vscale x 2 x i8> %a, <vscale x
; CHECK-LABEL: vsext_nxv2i8_nxv2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vsext.vf8 v10, v8, v0.t
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vsext.vf8 v8, v10, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i1> %m, i32 %vl)
ret <vscale x 2 x i64> %v
@@ -67,8 +67,8 @@ define <vscale x 2 x i64> @vsext_nxv2i8_nxv2i64_unmasked(<vscale x 2 x i8> %a, i
; CHECK-LABEL: vsext_nxv2i8_nxv2i64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vsext.vf8 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vsext.vf8 v8, v10
; CHECK-NEXT: ret
%v = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl)
ret <vscale x 2 x i64> %v
@@ -104,8 +104,8 @@ define <vscale x 2 x i64> @vsext_nxv2i16_nxv2i64(<vscale x 2 x i16> %a, <vscale
; CHECK-LABEL: vsext_nxv2i16_nxv2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vsext.vf4 v10, v8, v0.t
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vsext.vf4 v8, v10, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i1> %m, i32 %vl)
ret <vscale x 2 x i64> %v
@@ -115,8 +115,8 @@ define <vscale x 2 x i64> @vsext_nxv2i16_nxv2i64_unmasked(<vscale x 2 x i16> %a,
; CHECK-LABEL: vsext_nxv2i16_nxv2i64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vsext.vf4 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vsext.vf4 v8, v10
; CHECK-NEXT: ret
%v = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl)
ret <vscale x 2 x i64> %v
@@ -128,8 +128,8 @@ define <vscale x 2 x i64> @vsext_nxv2i32_nxv2i64(<vscale x 2 x i32> %a, <vscale
; CHECK-LABEL: vsext_nxv2i32_nxv2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vsext.vf2 v10, v8, v0.t
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vsext.vf2 v8, v10, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i32 %vl)
ret <vscale x 2 x i64> %v
@@ -139,8 +139,8 @@ define <vscale x 2 x i64> @vsext_nxv2i32_nxv2i64_unmasked(<vscale x 2 x i32> %a,
; CHECK-LABEL: vsext_nxv2i32_nxv2i64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vsext.vf2 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vsext.vf2 v8, v10
; CHECK-NEXT: ret
%v = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl)
ret <vscale x 2 x i64> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsext.ll b/llvm/test/CodeGen/RISCV/rvv/vsext.ll
index ba6783e8a6d39..bb83dbc98401a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsext.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsext.ll
@@ -57,8 +57,8 @@ define <vscale x 2 x i64> @intrinsic_vsext_vf8_nxv2i64(<vscale x 2 x i8> %0, iXL
; CHECK-LABEL: intrinsic_vsext_vf8_nxv2i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vsext.vf8 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vsext.vf8 v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vsext.nxv2i64.nxv2i8(
@@ -101,8 +101,8 @@ define <vscale x 4 x i64> @intrinsic_vsext_vf8_nxv4i64(<vscale x 4 x i8> %0, iXL
; CHECK-LABEL: intrinsic_vsext_vf8_nxv4i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT: vsext.vf8 v12, v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vsext.vf8 v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vsext.nxv4i64.nxv4i8(
@@ -145,8 +145,8 @@ define <vscale x 8 x i64> @intrinsic_vsext_vf8_nxv8i64(<vscale x 8 x i8> %0, iXL
; CHECK-LABEL: intrinsic_vsext_vf8_nxv8i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vsext.vf8 v16, v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv1r.v v16, v8
+; CHECK-NEXT: vsext.vf8 v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vsext.nxv8i64.nxv8i8(
@@ -233,8 +233,8 @@ define <vscale x 2 x i64> @intrinsic_vsext_vf4_nxv2i64(<vscale x 2 x i16> %0, iX
; CHECK-LABEL: intrinsic_vsext_vf4_nxv2i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vsext.vf4 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vsext.vf4 v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vsext.nxv2i64.nxv2i16(
@@ -277,8 +277,8 @@ define <vscale x 4 x i64> @intrinsic_vsext_vf4_nxv4i64(<vscale x 4 x i16> %0, iX
; CHECK-LABEL: intrinsic_vsext_vf4_nxv4i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT: vsext.vf4 v12, v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vsext.vf4 v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vsext.nxv4i64.nxv4i16(
@@ -321,8 +321,8 @@ define <vscale x 8 x i64> @intrinsic_vsext_vf4_nxv8i64(<vscale x 8 x i16> %0, iX
; CHECK-LABEL: intrinsic_vsext_vf4_nxv8i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vsext.vf4 v16, v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: vsext.vf4 v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vsext.nxv8i64.nxv8i16(
@@ -453,8 +453,8 @@ define <vscale x 4 x i32> @intrinsic_vsext_vf4_nxv4i32(<vscale x 4 x i8> %0, iXL
; CHECK-LABEL: intrinsic_vsext_vf4_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vsext.vf4 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vsext.vf4 v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vsext.nxv4i32.nxv4i8(
@@ -497,8 +497,8 @@ define <vscale x 8 x i32> @intrinsic_vsext_vf4_nxv8i32(<vscale x 8 x i8> %0, iXL
; CHECK-LABEL: intrinsic_vsext_vf4_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vsext.vf4 v12, v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vsext.vf4 v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vsext.nxv8i32.nxv8i8(
@@ -541,8 +541,8 @@ define <vscale x 16 x i32> @intrinsic_vsext_vf4_nxv16i32(<vscale x 16 x i8> %0,
; CHECK-LABEL: intrinsic_vsext_vf4_nxv16i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT: vsext.vf4 v16, v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: vsext.vf4 v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vsext.nxv16i32.nxv16i8(
@@ -629,8 +629,8 @@ define <vscale x 2 x i64> @intrinsic_vsext_vf2_nxv2i64(<vscale x 2 x i32> %0, iX
; CHECK-LABEL: intrinsic_vsext_vf2_nxv2i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vsext.vf2 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vsext.vf2 v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vsext.nxv2i64.nxv2i32(
@@ -673,8 +673,8 @@ define <vscale x 4 x i64> @intrinsic_vsext_vf2_nxv4i64(<vscale x 4 x i32> %0, iX
; CHECK-LABEL: intrinsic_vsext_vf2_nxv4i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT: vsext.vf2 v12, v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vsext.vf2 v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vsext.nxv4i64.nxv4i32(
@@ -717,8 +717,8 @@ define <vscale x 8 x i64> @intrinsic_vsext_vf2_nxv8i64(<vscale x 8 x i32> %0, iX
; CHECK-LABEL: intrinsic_vsext_vf2_nxv8i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vsext.vf2 v16, v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vsext.vf2 v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vsext.nxv8i64.nxv8i32(
@@ -849,8 +849,8 @@ define <vscale x 4 x i32> @intrinsic_vsext_vf2_nxv4i32(<vscale x 4 x i16> %0, iX
; CHECK-LABEL: intrinsic_vsext_vf2_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vsext.vf2 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vsext.vf2 v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vsext.nxv4i32.nxv4i16(
@@ -893,8 +893,8 @@ define <vscale x 8 x i32> @intrinsic_vsext_vf2_nxv8i32(<vscale x 8 x i16> %0, iX
; CHECK-LABEL: intrinsic_vsext_vf2_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vsext.vf2 v12, v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vsext.vf2 v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vsext.nxv8i32.nxv8i16(
@@ -937,8 +937,8 @@ define <vscale x 16 x i32> @intrinsic_vsext_vf2_nxv16i32(<vscale x 16 x i16> %0,
; CHECK-LABEL: intrinsic_vsext_vf2_nxv16i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT: vsext.vf2 v16, v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vsext.vf2 v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vsext.nxv16i32.nxv16i16(
@@ -1113,8 +1113,8 @@ define <vscale x 8 x i16> @intrinsic_vsext_vf2_nxv8i16(<vscale x 8 x i8> %0, iXL
; CHECK-LABEL: intrinsic_vsext_vf2_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vsext.vf2 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vsext.vf2 v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vsext.nxv8i16.nxv8i8(
@@ -1157,8 +1157,8 @@ define <vscale x 16 x i16> @intrinsic_vsext_vf2_nxv16i16(<vscale x 16 x i8> %0,
; CHECK-LABEL: intrinsic_vsext_vf2_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vsext.vf2 v12, v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vsext.vf2 v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vsext.nxv16i16.nxv16i8(
@@ -1201,8 +1201,8 @@ define <vscale x 32 x i16> @intrinsic_vsext_vf2_nxv32i16(<vscale x 32 x i8> %0,
; CHECK-LABEL: intrinsic_vsext_vf2_nxv32i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; CHECK-NEXT: vsext.vf2 v16, v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vsext.vf2 v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vsext.nxv32i16.nxv32i8(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll
index 045003600e64a..f69ae3d560ef7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll
@@ -462,8 +462,8 @@ define <vscale x 2 x double> @vsitofp_nxv2f64_nxv2i32(<vscale x 2 x i32> %va, <v
; CHECK-LABEL: vsitofp_nxv2f64_nxv2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v10, v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v10, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x double> %v
@@ -473,8 +473,8 @@ define <vscale x 2 x double> @vsitofp_nxv2f64_nxv2i32_unmasked(<vscale x 2 x i32
; CHECK-LABEL: vsitofp_nxv2f64_nxv2i32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.x.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.x.v v8, v10
; CHECK-NEXT: ret
%v = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
ret <vscale x 2 x double> %v
@@ -508,8 +508,7 @@ define <vscale x 32 x half> @vsitofp_nxv32f16_nxv32i32(<vscale x 32 x i32> %va,
; ZVFH-LABEL: vsitofp_nxv32f16_nxv32i32:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; ZVFH-NEXT: vmv1r.v v7, v0
-; ZVFH-NEXT: vmv8r.v v24, v16
+; ZVFH-NEXT: vmv1r.v v24, v0
; ZVFH-NEXT: csrr a1, vlenb
; ZVFH-NEXT: srli a2, a1, 2
; ZVFH-NEXT: slli a1, a1, 1
@@ -519,15 +518,15 @@ define <vscale x 32 x half> @vsitofp_nxv32f16_nxv32i32(<vscale x 32 x i32> %va,
; ZVFH-NEXT: addi a3, a3, -1
; ZVFH-NEXT: and a2, a3, a2
; ZVFH-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; ZVFH-NEXT: vfncvt.f.x.w v20, v24, v0.t
+; ZVFH-NEXT: vfncvt.f.x.w v28, v16, v0.t
; ZVFH-NEXT: bltu a0, a1, .LBB34_2
; ZVFH-NEXT: # %bb.1:
; ZVFH-NEXT: mv a0, a1
; ZVFH-NEXT: .LBB34_2:
-; ZVFH-NEXT: vmv1r.v v0, v7
+; ZVFH-NEXT: vmv1r.v v0, v24
; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFH-NEXT: vfncvt.f.x.w v16, v8, v0.t
-; ZVFH-NEXT: vmv8r.v v8, v16
+; ZVFH-NEXT: vfncvt.f.x.w v24, v8, v0.t
+; ZVFH-NEXT: vmv8r.v v8, v24
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vsitofp_nxv32f16_nxv32i32:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll
index 3bb56379e4613..6f9aea2d00981 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll
@@ -298,7 +298,7 @@ define <vscale x 32 x i32> @vtrunc_nxv32i64_nxv32i32(<vscale x 32 x i64> %a, <vs
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: vl8re64.v v8, (a0)
+; CHECK-NEXT: vl8re64.v v16, (a0)
; CHECK-NEXT: srli a5, a1, 3
; CHECK-NEXT: slli a4, a1, 3
; CHECK-NEXT: slli a3, a1, 1
@@ -312,20 +312,20 @@ define <vscale x 32 x i32> @vtrunc_nxv32i64_nxv32i32(<vscale x 32 x i64> %a, <vs
; CHECK-NEXT: addi a7, a7, -1
; CHECK-NEXT: and a4, a7, a4
; CHECK-NEXT: srli a7, a1, 2
-; CHECK-NEXT: vl8re64.v v24, (a6)
-; CHECK-NEXT: vslidedown.vx v16, v0, a7
+; CHECK-NEXT: vl8re64.v v8, (a6)
+; CHECK-NEXT: vslidedown.vx v24, v0, a7
; CHECK-NEXT: vsetvli a6, zero, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vx v6, v0, a5
-; CHECK-NEXT: vslidedown.vx v0, v16, a5
+; CHECK-NEXT: vslidedown.vx v0, v24, a5
; CHECK-NEXT: bltu a0, a1, .LBB17_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB17_2:
; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v20, v24, 0, v0.t
-; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vnsrl.wi v28, v8, 0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t
+; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t
; CHECK-NEXT: bltu a2, a3, .LBB17_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: mv a2, a3
@@ -336,9 +336,9 @@ define <vscale x 32 x i32> @vtrunc_nxv32i64_nxv32i32(<vscale x 32 x i64> %a, <vs
; CHECK-NEXT: and a0, a3, a0
; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: addi a3, sp, 16
-; CHECK-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v12, v24, 0, v0.t
+; CHECK-NEXT: vnsrl.wi v12, v16, 0, v0.t
; CHECK-NEXT: bltu a2, a1, .LBB17_6
; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: mv a2, a1
@@ -348,9 +348,10 @@ define <vscale x 32 x i32> @vtrunc_nxv32i64_nxv32i32(<vscale x 32 x i64> %a, <vs
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v24, 0, v0.t
+; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t
+; CHECK-NEXT: vmv8r.v v16, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll
index c3120668389cd..e1edaaadadf1d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll
@@ -454,8 +454,8 @@ define <vscale x 2 x double> @vuitofp_nxv2f64_nxv2i32(<vscale x 2 x i32> %va, <v
; CHECK-LABEL: vuitofp_nxv2f64_nxv2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v10, v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v10, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x double> %v
@@ -465,8 +465,8 @@ define <vscale x 2 x double> @vuitofp_nxv2f64_nxv2i32_unmasked(<vscale x 2 x i32
; CHECK-LABEL: vuitofp_nxv2f64_nxv2i32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfwcvt.f.xu.v v10, v8
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.f.xu.v v8, v10
; CHECK-NEXT: ret
%v = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
ret <vscale x 2 x double> %v
@@ -500,8 +500,7 @@ define <vscale x 32 x half> @vuitofp_nxv32f16_nxv32i32(<vscale x 32 x i32> %va,
; ZVFH-LABEL: vuitofp_nxv32f16_nxv32i32:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; ZVFH-NEXT: vmv1r.v v7, v0
-; ZVFH-NEXT: vmv8r.v v24, v16
+; ZVFH-NEXT: vmv1r.v v24, v0
; ZVFH-NEXT: csrr a1, vlenb
; ZVFH-NEXT: srli a2, a1, 2
; ZVFH-NEXT: slli a1, a1, 1
@@ -511,15 +510,15 @@ define <vscale x 32 x half> @vuitofp_nxv32f16_nxv32i32(<vscale x 32 x i32> %va,
; ZVFH-NEXT: addi a3, a3, -1
; ZVFH-NEXT: and a2, a3, a2
; ZVFH-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; ZVFH-NEXT: vfncvt.f.xu.w v20, v24, v0.t
+; ZVFH-NEXT: vfncvt.f.xu.w v28, v16, v0.t
; ZVFH-NEXT: bltu a0, a1, .LBB34_2
; ZVFH-NEXT: # %bb.1:
; ZVFH-NEXT: mv a0, a1
; ZVFH-NEXT: .LBB34_2:
-; ZVFH-NEXT: vmv1r.v v0, v7
+; ZVFH-NEXT: vmv1r.v v0, v24
; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFH-NEXT: vfncvt.f.xu.w v16, v8, v0.t
-; ZVFH-NEXT: vmv8r.v v8, v16
+; ZVFH-NEXT: vfncvt.f.xu.w v24, v8, v0.t
+; ZVFH-NEXT: vmv8r.v v8, v24
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vuitofp_nxv32f16_nxv32i32:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-mask-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-mask-sdnode.ll
index 02af09f028fc1..747fb5a3e7ff8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwadd-mask-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwadd-mask-sdnode.ll
@@ -5,12 +5,13 @@
define <vscale x 8 x i64> @vwadd_wv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i64> %y) {
; CHECK-LABEL: vwadd_wv_mask_v8i32:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmv4r.v v24, v8
; CHECK-NEXT: li a0, 42
-; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
; CHECK-NEXT: vmslt.vx v0, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, mu
-; CHECK-NEXT: vwadd.wv v16, v16, v8, v0.t
; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, mu
+; CHECK-NEXT: vwadd.wv v8, v8, v24, v0.t
; CHECK-NEXT: ret
%mask = icmp slt <vscale x 8 x i32> %x, splat (i32 42)
%a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
@@ -22,12 +23,13 @@ define <vscale x 8 x i64> @vwadd_wv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x
define <vscale x 8 x i64> @vwaddu_wv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i64> %y) {
; CHECK-LABEL: vwaddu_wv_mask_v8i32:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmv4r.v v24, v8
; CHECK-NEXT: li a0, 42
-; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
; CHECK-NEXT: vmslt.vx v0, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, mu
-; CHECK-NEXT: vwaddu.wv v16, v16, v8, v0.t
; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, mu
+; CHECK-NEXT: vwaddu.wv v8, v8, v24, v0.t
; CHECK-NEXT: ret
%mask = icmp slt <vscale x 8 x i32> %x, splat (i32 42)
%a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
@@ -39,13 +41,13 @@ define <vscale x 8 x i64> @vwaddu_wv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x
define <vscale x 8 x i64> @vwaddu_vv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %y) {
; CHECK-LABEL: vwaddu_vv_mask_v8i32:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmv4r.v v16, v12
; CHECK-NEXT: li a0, 42
-; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
; CHECK-NEXT: vmslt.vx v0, v8, a0
-; CHECK-NEXT: vmv.v.i v16, 0
-; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
-; CHECK-NEXT: vwaddu.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv.v.i v12, 0
+; CHECK-NEXT: vmerge.vvm v20, v12, v8, v0
+; CHECK-NEXT: vwaddu.vv v8, v20, v16
; CHECK-NEXT: ret
%mask = icmp slt <vscale x 8 x i32> %x, splat (i32 42)
%a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
@@ -58,12 +60,13 @@ define <vscale x 8 x i64> @vwaddu_vv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x
define <vscale x 8 x i64> @vwadd_wv_mask_v8i32_commutative(<vscale x 8 x i32> %x, <vscale x 8 x i64> %y) {
; CHECK-LABEL: vwadd_wv_mask_v8i32_commutative:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmv4r.v v24, v8
; CHECK-NEXT: li a0, 42
-; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
; CHECK-NEXT: vmslt.vx v0, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, mu
-; CHECK-NEXT: vwadd.wv v16, v16, v8, v0.t
; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, mu
+; CHECK-NEXT: vwadd.wv v8, v8, v24, v0.t
; CHECK-NEXT: ret
%mask = icmp slt <vscale x 8 x i32> %x, splat (i32 42)
%a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
index ddc27f7562cdb..3f5d42f89337b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
@@ -110,8 +110,9 @@ define <vscale x 2 x i64> @vwadd_vv_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, <vsc
; CHECK-LABEL: vwadd_vv_nxv2i64_nxv2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vwadd.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwadd.vv v8, v11, v10
; CHECK-NEXT: ret
%vc = sext <vscale x 2 x i32> %va to <vscale x 2 x i64>
%vd = sext <vscale x 2 x i32> %vb to <vscale x 2 x i64>
@@ -123,8 +124,9 @@ define <vscale x 2 x i64> @vwaddu_vv_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, <vs
; CHECK-LABEL: vwaddu_vv_nxv2i64_nxv2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vwaddu.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwaddu.vv v8, v11, v10
; CHECK-NEXT: ret
%vc = zext <vscale x 2 x i32> %va to <vscale x 2 x i64>
%vd = zext <vscale x 2 x i32> %vb to <vscale x 2 x i64>
@@ -136,8 +138,8 @@ define <vscale x 2 x i64> @vwadd_vx_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, i32
; CHECK-LABEL: vwadd_vx_nxv2i64_nxv2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT: vwadd.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwadd.vx v8, v10, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
%splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -151,8 +153,8 @@ define <vscale x 2 x i64> @vwaddu_vx_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, i32
; CHECK-LABEL: vwaddu_vx_nxv2i64_nxv2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT: vwaddu.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwaddu.vx v8, v10, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
%splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -214,8 +216,9 @@ define <vscale x 4 x i64> @vwadd_vv_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, <vsc
; CHECK-LABEL: vwadd_vv_nxv4i64_nxv4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vwadd.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwadd.vv v8, v14, v12
; CHECK-NEXT: ret
%vc = sext <vscale x 4 x i32> %va to <vscale x 4 x i64>
%vd = sext <vscale x 4 x i32> %vb to <vscale x 4 x i64>
@@ -227,8 +230,9 @@ define <vscale x 4 x i64> @vwaddu_vv_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, <vs
; CHECK-LABEL: vwaddu_vv_nxv4i64_nxv4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vwaddu.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwaddu.vv v8, v14, v12
; CHECK-NEXT: ret
%vc = zext <vscale x 4 x i32> %va to <vscale x 4 x i64>
%vd = zext <vscale x 4 x i32> %vb to <vscale x 4 x i64>
@@ -240,8 +244,8 @@ define <vscale x 4 x i64> @vwadd_vx_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, i32
; CHECK-LABEL: vwadd_vx_nxv4i64_nxv4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT: vwadd.vx v12, v8, a0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwadd.vx v8, v12, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
%splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
@@ -255,8 +259,8 @@ define <vscale x 4 x i64> @vwaddu_vx_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, i32
; CHECK-LABEL: vwaddu_vx_nxv4i64_nxv4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT: vwaddu.vx v12, v8, a0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwaddu.vx v8, v12, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
%splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
@@ -318,8 +322,9 @@ define <vscale x 8 x i64> @vwadd_vv_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, <vsc
; CHECK-LABEL: vwadd_vv_nxv8i64_nxv8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vwadd.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vwadd.vv v8, v20, v16
; CHECK-NEXT: ret
%vc = sext <vscale x 8 x i32> %va to <vscale x 8 x i64>
%vd = sext <vscale x 8 x i32> %vb to <vscale x 8 x i64>
@@ -331,8 +336,9 @@ define <vscale x 8 x i64> @vwaddu_vv_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, <vs
; CHECK-LABEL: vwaddu_vv_nxv8i64_nxv8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vwaddu.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vwaddu.vv v8, v20, v16
; CHECK-NEXT: ret
%vc = zext <vscale x 8 x i32> %va to <vscale x 8 x i64>
%vd = zext <vscale x 8 x i32> %vb to <vscale x 8 x i64>
@@ -344,8 +350,8 @@ define <vscale x 8 x i64> @vwadd_vx_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, i32
; CHECK-LABEL: vwadd_vx_nxv8i64_nxv8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT: vwadd.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwadd.vx v8, v16, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
%splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -359,8 +365,8 @@ define <vscale x 8 x i64> @vwaddu_vx_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, i32
; CHECK-LABEL: vwaddu_vx_nxv8i64_nxv8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT: vwaddu.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwaddu.vx v8, v16, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
%splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -744,10 +750,10 @@ define <vscale x 4 x i64> @vwadd_wx_nxv4i64_nxv4i16(<vscale x 4 x i64> %va, i16
; CHECK-LABEL: vwadd_wx_nxv4i64_nxv4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v12, a0
+; CHECK-NEXT: vmv.v.x v14, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vsext.vf2 v14, v12
-; CHECK-NEXT: vwadd.wv v8, v8, v14
+; CHECK-NEXT: vsext.vf2 v12, v14
+; CHECK-NEXT: vwadd.wv v8, v8, v12
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i16> poison, i16 %b, i16 0
%splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -760,10 +766,10 @@ define <vscale x 4 x i64> @vwaddu_wx_nxv4i64_nxv4i16(<vscale x 4 x i64> %va, i16
; CHECK-LABEL: vwaddu_wx_nxv4i64_nxv4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v12, a0
+; CHECK-NEXT: vmv.v.x v14, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v14, v12
-; CHECK-NEXT: vwaddu.wv v8, v8, v14
+; CHECK-NEXT: vzext.vf2 v12, v14
+; CHECK-NEXT: vwaddu.wv v8, v8, v12
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i16> poison, i16 %b, i16 0
%splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -862,10 +868,10 @@ define <vscale x 8 x i64> @vwadd_wx_nxv8i64_nxv8i16(<vscale x 8 x i64> %va, i16
; CHECK-LABEL: vwadd_wx_nxv8i64_nxv8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT: vmv.v.x v16, a0
+; CHECK-NEXT: vmv.v.x v20, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vsext.vf2 v20, v16
-; CHECK-NEXT: vwadd.wv v8, v8, v20
+; CHECK-NEXT: vsext.vf2 v16, v20
+; CHECK-NEXT: vwadd.wv v8, v8, v16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i16> poison, i16 %b, i16 0
%splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -878,10 +884,10 @@ define <vscale x 8 x i64> @vwaddu_wx_nxv8i64_nxv8i16(<vscale x 8 x i64> %va, i16
; CHECK-LABEL: vwaddu_wx_nxv8i64_nxv8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT: vmv.v.x v16, a0
+; CHECK-NEXT: vmv.v.x v20, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vzext.vf2 v20, v16
-; CHECK-NEXT: vwaddu.wv v8, v8, v20
+; CHECK-NEXT: vzext.vf2 v16, v20
+; CHECK-NEXT: vwaddu.wv v8, v8, v16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i16> poison, i16 %b, i16 0
%splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -1216,10 +1222,10 @@ define <vscale x 4 x i64> @vwadd_wx_nxv4i64_nxv4i8(<vscale x 4 x i64> %va, i8 %b
; CHECK-LABEL: vwadd_wx_nxv4i64_nxv4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v12, a0
+; CHECK-NEXT: vmv.v.x v14, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vsext.vf4 v14, v12
-; CHECK-NEXT: vwadd.wv v8, v8, v14
+; CHECK-NEXT: vsext.vf4 v12, v14
+; CHECK-NEXT: vwadd.wv v8, v8, v12
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i8> poison, i8 %b, i8 0
%splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -1232,10 +1238,10 @@ define <vscale x 4 x i64> @vwaddu_wx_nxv4i64_nxv4i8(<vscale x 4 x i64> %va, i8 %
; CHECK-LABEL: vwaddu_wx_nxv4i64_nxv4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v12, a0
+; CHECK-NEXT: vmv.v.x v14, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vzext.vf4 v14, v12
-; CHECK-NEXT: vwaddu.wv v8, v8, v14
+; CHECK-NEXT: vzext.vf4 v12, v14
+; CHECK-NEXT: vwaddu.wv v8, v8, v12
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i8> poison, i8 %b, i8 0
%splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -1334,10 +1340,10 @@ define <vscale x 8 x i64> @vwadd_wx_nxv8i64_nxv8i8(<vscale x 8 x i64> %va, i8 %b
; CHECK-LABEL: vwadd_wx_nxv8i64_nxv8i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v16, a0
+; CHECK-NEXT: vmv.v.x v20, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vsext.vf4 v20, v16
-; CHECK-NEXT: vwadd.wv v8, v8, v20
+; CHECK-NEXT: vsext.vf4 v16, v20
+; CHECK-NEXT: vwadd.wv v8, v8, v16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i8> poison, i8 %b, i8 0
%splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -1350,10 +1356,10 @@ define <vscale x 8 x i64> @vwaddu_wx_nxv8i64_nxv8i8(<vscale x 8 x i64> %va, i8 %
; CHECK-LABEL: vwaddu_wx_nxv8i64_nxv8i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v16, a0
+; CHECK-NEXT: vmv.v.x v20, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vzext.vf4 v20, v16
-; CHECK-NEXT: vwaddu.wv v8, v8, v20
+; CHECK-NEXT: vzext.vf4 v16, v20
+; CHECK-NEXT: vwaddu.wv v8, v8, v16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i8> poison, i8 %b, i8 0
%splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -1484,8 +1490,8 @@ define <vscale x 8 x i64> @vwadd_vx_splat_zext(<vscale x 8 x i32> %va, i32 %b) {
; RV64-LABEL: vwadd_vx_splat_zext:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV64-NEXT: vwaddu.vx v16, v8, a0
-; RV64-NEXT: vmv8r.v v8, v16
+; RV64-NEXT: vmv4r.v v16, v8
+; RV64-NEXT: vwaddu.vx v8, v16, a0
; RV64-NEXT: ret
%zb = zext i32 %b to i64
%head = insertelement <vscale x 8 x i64> poison, i64 %zb, i32 0
@@ -1559,8 +1565,8 @@ define <vscale x 8 x i64> @vwadd_vx_splat_sext(<vscale x 8 x i32> %va, i32 %b) {
; CHECK-LABEL: vwadd_vx_splat_sext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT: vwadd.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwadd.vx v8, v16, a0
; CHECK-NEXT: ret
%sb = sext i32 %b to i64
%head = insertelement <vscale x 8 x i64> poison, i64 %sb, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd.ll
index c7df0378b3e34..a0ff37b600043 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwadd.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwadd.ll
@@ -158,8 +158,9 @@ define <vscale x 8 x i16> @intrinsic_vwadd_vv_nxv8i16_nxv8i8_nxv8i8(<vscale x 8
; CHECK-LABEL: intrinsic_vwadd_vv_nxv8i16_nxv8i8_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vwadd.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwadd.vv v8, v11, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwadd.nxv8i16.nxv8i8.nxv8i8(
@@ -206,8 +207,9 @@ define <vscale x 16 x i16> @intrinsic_vwadd_vv_nxv16i16_nxv16i8_nxv16i8(<vscale
; CHECK-LABEL: intrinsic_vwadd_vv_nxv16i16_nxv16i8_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT: vwadd.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwadd.vv v8, v14, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwadd.nxv16i16.nxv16i8.nxv16i8(
@@ -254,8 +256,9 @@ define <vscale x 32 x i16> @intrinsic_vwadd_vv_nxv32i16_nxv32i8_nxv32i8(<vscale
; CHECK-LABEL: intrinsic_vwadd_vv_nxv32i16_nxv32i8_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT: vwadd.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vwadd.vv v8, v20, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwadd.nxv32i16.nxv32i8.nxv32i8(
@@ -398,8 +401,9 @@ define <vscale x 4 x i32> @intrinsic_vwadd_vv_nxv4i32_nxv4i16_nxv4i16(<vscale x
; CHECK-LABEL: intrinsic_vwadd_vv_nxv4i32_nxv4i16_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vwadd.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwadd.vv v8, v11, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwadd.nxv4i32.nxv4i16.nxv4i16(
@@ -446,8 +450,9 @@ define <vscale x 8 x i32> @intrinsic_vwadd_vv_nxv8i32_nxv8i16_nxv8i16(<vscale x
; CHECK-LABEL: intrinsic_vwadd_vv_nxv8i32_nxv8i16_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vwadd.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwadd.vv v8, v14, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwadd.nxv8i32.nxv8i16.nxv8i16(
@@ -494,8 +499,9 @@ define <vscale x 16 x i32> @intrinsic_vwadd_vv_nxv16i32_nxv16i16_nxv16i16(<vscal
; CHECK-LABEL: intrinsic_vwadd_vv_nxv16i32_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vwadd.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vwadd.vv v8, v20, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwadd.nxv16i32.nxv16i16.nxv16i16(
@@ -590,8 +596,9 @@ define <vscale x 2 x i64> @intrinsic_vwadd_vv_nxv2i64_nxv2i32_nxv2i32(<vscale x
; CHECK-LABEL: intrinsic_vwadd_vv_nxv2i64_nxv2i32_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vwadd.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwadd.vv v8, v11, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwadd.nxv2i64.nxv2i32.nxv2i32(
@@ -638,8 +645,9 @@ define <vscale x 4 x i64> @intrinsic_vwadd_vv_nxv4i64_nxv4i32_nxv4i32(<vscale x
; CHECK-LABEL: intrinsic_vwadd_vv_nxv4i64_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vwadd.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwadd.vv v8, v14, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwadd.nxv4i64.nxv4i32.nxv4i32(
@@ -686,8 +694,9 @@ define <vscale x 8 x i64> @intrinsic_vwadd_vv_nxv8i64_nxv8i32_nxv8i32(<vscale x
; CHECK-LABEL: intrinsic_vwadd_vv_nxv8i64_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vwadd.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vwadd.vv v8, v20, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwadd.nxv8i64.nxv8i32.nxv8i32(
@@ -878,8 +887,8 @@ define <vscale x 8 x i16> @intrinsic_vwadd_vx_nxv8i16_nxv8i8_i8(<vscale x 8 x i8
; CHECK-LABEL: intrinsic_vwadd_vx_nxv8i16_nxv8i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vwadd.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwadd.vx v8, v10, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwadd.nxv8i16.nxv8i8.i8(
@@ -926,8 +935,8 @@ define <vscale x 16 x i16> @intrinsic_vwadd_vx_nxv16i16_nxv16i8_i8(<vscale x 16
; CHECK-LABEL: intrinsic_vwadd_vx_nxv16i16_nxv16i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT: vwadd.vx v12, v8, a0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwadd.vx v8, v12, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwadd.nxv16i16.nxv16i8.i8(
@@ -974,8 +983,8 @@ define <vscale x 32 x i16> @intrinsic_vwadd_vx_nxv32i16_nxv32i8_i8(<vscale x 32
; CHECK-LABEL: intrinsic_vwadd_vx_nxv32i16_nxv32i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT: vwadd.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwadd.vx v8, v16, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwadd.nxv32i16.nxv32i8.i8(
@@ -1118,8 +1127,8 @@ define <vscale x 4 x i32> @intrinsic_vwadd_vx_nxv4i32_nxv4i16_i16(<vscale x 4 x
; CHECK-LABEL: intrinsic_vwadd_vx_nxv4i32_nxv4i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vwadd.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwadd.vx v8, v10, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwadd.nxv4i32.nxv4i16.i16(
@@ -1166,8 +1175,8 @@ define <vscale x 8 x i32> @intrinsic_vwadd_vx_nxv8i32_nxv8i16_i16(<vscale x 8 x
; CHECK-LABEL: intrinsic_vwadd_vx_nxv8i32_nxv8i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vwadd.vx v12, v8, a0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwadd.vx v8, v12, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwadd.nxv8i32.nxv8i16.i16(
@@ -1214,8 +1223,8 @@ define <vscale x 16 x i32> @intrinsic_vwadd_vx_nxv16i32_nxv16i16_i16(<vscale x 1
; CHECK-LABEL: intrinsic_vwadd_vx_nxv16i32_nxv16i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vwadd.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwadd.vx v8, v16, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwadd.nxv16i32.nxv16i16.i16(
@@ -1310,8 +1319,8 @@ define <vscale x 2 x i64> @intrinsic_vwadd_vx_nxv2i64_nxv2i32_i32(<vscale x 2 x
; CHECK-LABEL: intrinsic_vwadd_vx_nxv2i64_nxv2i32_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vwadd.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwadd.vx v8, v10, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwadd.nxv2i64.nxv2i32.i32(
@@ -1358,8 +1367,8 @@ define <vscale x 4 x i64> @intrinsic_vwadd_vx_nxv4i64_nxv4i32_i32(<vscale x 4 x
; CHECK-LABEL: intrinsic_vwadd_vx_nxv4i64_nxv4i32_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vwadd.vx v12, v8, a0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwadd.vx v8, v12, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwadd.nxv4i64.nxv4i32.i32(
@@ -1406,8 +1415,8 @@ define <vscale x 8 x i64> @intrinsic_vwadd_vx_nxv8i64_nxv8i32_i32(<vscale x 8 x
; CHECK-LABEL: intrinsic_vwadd_vx_nxv8i64_nxv8i32_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vwadd.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwadd.vx v8, v16, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwadd.nxv8i64.nxv8i32.i32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd.w.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd.w.ll
index b1309f5291877..3850261e2117a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwadd.w.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwadd.w.ll
@@ -1986,8 +1986,8 @@ define <vscale x 8 x i16> @intrinsic_vwadd.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<vs
; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv8i16_nxv8i16_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vwadd.wv v12, v10, v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vwadd.wv v8, v10, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwadd.w.nxv8i16.nxv8i8(
@@ -2003,8 +2003,8 @@ define <vscale x 16 x i16> @intrinsic_vwadd.w_wv_untie_nxv16i16_nxv16i16_nxv16i8
; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv16i16_nxv16i16_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT: vwadd.wv v16, v12, v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: vwadd.wv v8, v12, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwadd.w.nxv16i16.nxv16i8(
@@ -2020,8 +2020,8 @@ define <vscale x 32 x i16> @intrinsic_vwadd.w_wv_untie_nxv32i16_nxv32i16_nxv32i8
; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv32i16_nxv32i16_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT: vwadd.wv v24, v16, v8
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vmv4r.v v24, v8
+; CHECK-NEXT: vwadd.wv v8, v16, v24
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwadd.w.nxv32i16.nxv32i8(
@@ -2071,8 +2071,8 @@ define <vscale x 4 x i32> @intrinsic_vwadd.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<v
; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv4i32_nxv4i32_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vwadd.wv v12, v10, v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vwadd.wv v8, v10, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwadd.w.nxv4i32.nxv4i16(
@@ -2088,8 +2088,8 @@ define <vscale x 8 x i32> @intrinsic_vwadd.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<v
; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv8i32_nxv8i32_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vwadd.wv v16, v12, v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: vwadd.wv v8, v12, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwadd.w.nxv8i32.nxv8i16(
@@ -2122,8 +2122,8 @@ define <vscale x 2 x i64> @intrinsic_vwadd.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<v
; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv2i64_nxv2i64_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vwadd.wv v12, v10, v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vwadd.wv v8, v10, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwadd.w.nxv2i64.nxv2i32(
@@ -2139,8 +2139,8 @@ define <vscale x 4 x i64> @intrinsic_vwadd.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<v
; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv4i64_nxv4i64_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vwadd.wv v16, v12, v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: vwadd.wv v8, v12, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwadd.w.nxv4i64.nxv4i32(
@@ -2156,8 +2156,8 @@ define <vscale x 8 x i64> @intrinsic_vwadd.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<v
; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv8i64_nxv8i64_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vwadd.wv v24, v16, v8
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vmv4r.v v24, v8
+; CHECK-NEXT: vwadd.wv v8, v16, v24
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwadd.w.nxv8i64.nxv8i32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwaddu.ll b/llvm/test/CodeGen/RISCV/rvv/vwaddu.ll
index 46932a13d22f0..4f82844c0edfc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwaddu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwaddu.ll
@@ -158,8 +158,9 @@ define <vscale x 8 x i16> @intrinsic_vwaddu_vv_nxv8i16_nxv8i8_nxv8i8(<vscale x 8
; CHECK-LABEL: intrinsic_vwaddu_vv_nxv8i16_nxv8i8_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vwaddu.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwaddu.vv v8, v11, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwaddu.nxv8i16.nxv8i8.nxv8i8(
@@ -206,8 +207,9 @@ define <vscale x 16 x i16> @intrinsic_vwaddu_vv_nxv16i16_nxv16i8_nxv16i8(<vscale
; CHECK-LABEL: intrinsic_vwaddu_vv_nxv16i16_nxv16i8_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT: vwaddu.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwaddu.vv v8, v14, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwaddu.nxv16i16.nxv16i8.nxv16i8(
@@ -254,8 +256,9 @@ define <vscale x 32 x i16> @intrinsic_vwaddu_vv_nxv32i16_nxv32i8_nxv32i8(<vscale
; CHECK-LABEL: intrinsic_vwaddu_vv_nxv32i16_nxv32i8_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT: vwaddu.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vwaddu.vv v8, v20, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwaddu.nxv32i16.nxv32i8.nxv32i8(
@@ -398,8 +401,9 @@ define <vscale x 4 x i32> @intrinsic_vwaddu_vv_nxv4i32_nxv4i16_nxv4i16(<vscale x
; CHECK-LABEL: intrinsic_vwaddu_vv_nxv4i32_nxv4i16_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vwaddu.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwaddu.vv v8, v11, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwaddu.nxv4i32.nxv4i16.nxv4i16(
@@ -446,8 +450,9 @@ define <vscale x 8 x i32> @intrinsic_vwaddu_vv_nxv8i32_nxv8i16_nxv8i16(<vscale x
; CHECK-LABEL: intrinsic_vwaddu_vv_nxv8i32_nxv8i16_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vwaddu.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwaddu.vv v8, v14, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwaddu.nxv8i32.nxv8i16.nxv8i16(
@@ -494,8 +499,9 @@ define <vscale x 16 x i32> @intrinsic_vwaddu_vv_nxv16i32_nxv16i16_nxv16i16(<vsca
; CHECK-LABEL: intrinsic_vwaddu_vv_nxv16i32_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vwaddu.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vwaddu.vv v8, v20, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwaddu.nxv16i32.nxv16i16.nxv16i16(
@@ -590,8 +596,9 @@ define <vscale x 2 x i64> @intrinsic_vwaddu_vv_nxv2i64_nxv2i32_nxv2i32(<vscale x
; CHECK-LABEL: intrinsic_vwaddu_vv_nxv2i64_nxv2i32_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vwaddu.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwaddu.vv v8, v11, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwaddu.nxv2i64.nxv2i32.nxv2i32(
@@ -638,8 +645,9 @@ define <vscale x 4 x i64> @intrinsic_vwaddu_vv_nxv4i64_nxv4i32_nxv4i32(<vscale x
; CHECK-LABEL: intrinsic_vwaddu_vv_nxv4i64_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vwaddu.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwaddu.vv v8, v14, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwaddu.nxv4i64.nxv4i32.nxv4i32(
@@ -686,8 +694,9 @@ define <vscale x 8 x i64> @intrinsic_vwaddu_vv_nxv8i64_nxv8i32_nxv8i32(<vscale x
; CHECK-LABEL: intrinsic_vwaddu_vv_nxv8i64_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vwaddu.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vwaddu.vv v8, v20, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwaddu.nxv8i64.nxv8i32.nxv8i32(
@@ -878,8 +887,8 @@ define <vscale x 8 x i16> @intrinsic_vwaddu_vx_nxv8i16_nxv8i8_i8(<vscale x 8 x i
; CHECK-LABEL: intrinsic_vwaddu_vx_nxv8i16_nxv8i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vwaddu.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwaddu.vx v8, v10, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwaddu.nxv8i16.nxv8i8.i8(
@@ -926,8 +935,8 @@ define <vscale x 16 x i16> @intrinsic_vwaddu_vx_nxv16i16_nxv16i8_i8(<vscale x 16
; CHECK-LABEL: intrinsic_vwaddu_vx_nxv16i16_nxv16i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT: vwaddu.vx v12, v8, a0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwaddu.vx v8, v12, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwaddu.nxv16i16.nxv16i8.i8(
@@ -974,8 +983,8 @@ define <vscale x 32 x i16> @intrinsic_vwaddu_vx_nxv32i16_nxv32i8_i8(<vscale x 32
; CHECK-LABEL: intrinsic_vwaddu_vx_nxv32i16_nxv32i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT: vwaddu.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwaddu.vx v8, v16, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwaddu.nxv32i16.nxv32i8.i8(
@@ -1118,8 +1127,8 @@ define <vscale x 4 x i32> @intrinsic_vwaddu_vx_nxv4i32_nxv4i16_i16(<vscale x 4 x
; CHECK-LABEL: intrinsic_vwaddu_vx_nxv4i32_nxv4i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vwaddu.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwaddu.vx v8, v10, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwaddu.nxv4i32.nxv4i16.i16(
@@ -1166,8 +1175,8 @@ define <vscale x 8 x i32> @intrinsic_vwaddu_vx_nxv8i32_nxv8i16_i16(<vscale x 8 x
; CHECK-LABEL: intrinsic_vwaddu_vx_nxv8i32_nxv8i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vwaddu.vx v12, v8, a0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwaddu.vx v8, v12, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwaddu.nxv8i32.nxv8i16.i16(
@@ -1214,8 +1223,8 @@ define <vscale x 16 x i32> @intrinsic_vwaddu_vx_nxv16i32_nxv16i16_i16(<vscale x
; CHECK-LABEL: intrinsic_vwaddu_vx_nxv16i32_nxv16i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vwaddu.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwaddu.vx v8, v16, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwaddu.nxv16i32.nxv16i16.i16(
@@ -1310,8 +1319,8 @@ define <vscale x 2 x i64> @intrinsic_vwaddu_vx_nxv2i64_nxv2i32_i32(<vscale x 2 x
; CHECK-LABEL: intrinsic_vwaddu_vx_nxv2i64_nxv2i32_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vwaddu.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwaddu.vx v8, v10, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwaddu.nxv2i64.nxv2i32.i32(
@@ -1358,8 +1367,8 @@ define <vscale x 4 x i64> @intrinsic_vwaddu_vx_nxv4i64_nxv4i32_i32(<vscale x 4 x
; CHECK-LABEL: intrinsic_vwaddu_vx_nxv4i64_nxv4i32_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vwaddu.vx v12, v8, a0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwaddu.vx v8, v12, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwaddu.nxv4i64.nxv4i32.i32(
@@ -1406,8 +1415,8 @@ define <vscale x 8 x i64> @intrinsic_vwaddu_vx_nxv8i64_nxv8i32_i32(<vscale x 8 x
; CHECK-LABEL: intrinsic_vwaddu_vx_nxv8i64_nxv8i32_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vwaddu.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwaddu.vx v8, v16, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwaddu.nxv8i64.nxv8i32.i32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwaddu.w.ll b/llvm/test/CodeGen/RISCV/rvv/vwaddu.w.ll
index 5a9a072129d31..8af403063f80a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwaddu.w.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwaddu.w.ll
@@ -1982,8 +1982,8 @@ define <vscale x 8 x i16> @intrinsic_vwaddu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<v
; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vwaddu.wv v12, v10, v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vwaddu.wv v8, v10, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwaddu.w.nxv8i16.nxv8i8(
@@ -1999,8 +1999,8 @@ define <vscale x 16 x i16> @intrinsic_vwaddu.w_wv_untie_nxv16i16_nxv16i16_nxv16i
; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT: vwaddu.wv v16, v12, v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: vwaddu.wv v8, v12, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwaddu.w.nxv16i16.nxv16i8(
@@ -2016,8 +2016,8 @@ define <vscale x 32 x i16> @intrinsic_vwaddu.w_wv_untie_nxv32i16_nxv32i16_nxv32i
; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT: vwaddu.wv v24, v16, v8
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vmv4r.v v24, v8
+; CHECK-NEXT: vwaddu.wv v8, v16, v24
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwaddu.w.nxv32i16.nxv32i8(
@@ -2067,8 +2067,8 @@ define <vscale x 4 x i32> @intrinsic_vwaddu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<
; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vwaddu.wv v12, v10, v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vwaddu.wv v8, v10, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwaddu.w.nxv4i32.nxv4i16(
@@ -2084,8 +2084,8 @@ define <vscale x 8 x i32> @intrinsic_vwaddu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<
; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vwaddu.wv v16, v12, v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: vwaddu.wv v8, v12, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwaddu.w.nxv8i32.nxv8i16(
@@ -2118,8 +2118,8 @@ define <vscale x 2 x i64> @intrinsic_vwaddu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<
; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vwaddu.wv v12, v10, v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vwaddu.wv v8, v10, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwaddu.w.nxv2i64.nxv2i32(
@@ -2135,8 +2135,8 @@ define <vscale x 4 x i64> @intrinsic_vwaddu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<
; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vwaddu.wv v16, v12, v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: vwaddu.wv v8, v12, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwaddu.w.nxv4i64.nxv4i32(
@@ -2152,8 +2152,8 @@ define <vscale x 8 x i64> @intrinsic_vwaddu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<
; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vwaddu.wv v24, v16, v8
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vmv4r.v v24, v8
+; CHECK-NEXT: vwaddu.wv v8, v16, v24
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwaddu.w.nxv8i64.nxv8i32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll
index 28fc53f37ba1d..b55a7f610aad5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll
@@ -90,8 +90,9 @@ define <vscale x 2 x i64> @vwmul_vv_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, <vsc
; CHECK-LABEL: vwmul_vv_nxv2i64_nxv2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vwmul.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwmul.vv v8, v11, v10
; CHECK-NEXT: ret
%vc = sext <vscale x 2 x i32> %va to <vscale x 2 x i64>
%vd = sext <vscale x 2 x i32> %vb to <vscale x 2 x i64>
@@ -103,8 +104,9 @@ define <vscale x 2 x i64> @vwmulu_vv_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, <vs
; CHECK-LABEL: vwmulu_vv_nxv2i64_nxv2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vwmulu.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwmulu.vv v8, v11, v10
; CHECK-NEXT: ret
%vc = zext <vscale x 2 x i32> %va to <vscale x 2 x i64>
%vd = zext <vscale x 2 x i32> %vb to <vscale x 2 x i64>
@@ -116,8 +118,9 @@ define <vscale x 2 x i64> @vwmulsu_vv_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, <v
; CHECK-LABEL: vwmulsu_vv_nxv2i64_nxv2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vwmulsu.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwmulsu.vv v8, v11, v10
; CHECK-NEXT: ret
%vc = sext <vscale x 2 x i32> %va to <vscale x 2 x i64>
%vd = zext <vscale x 2 x i32> %vb to <vscale x 2 x i64>
@@ -129,8 +132,8 @@ define <vscale x 2 x i64> @vwmul_vx_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, i32
; CHECK-LABEL: vwmul_vx_nxv2i64_nxv2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT: vwmul.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwmul.vx v8, v10, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i32> undef, i32 %b, i32 0
%splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
@@ -144,8 +147,8 @@ define <vscale x 2 x i64> @vwmulu_vx_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, i32
; CHECK-LABEL: vwmulu_vx_nxv2i64_nxv2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT: vwmulu.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwmulu.vx v8, v10, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i32> undef, i32 %b, i32 0
%splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
@@ -159,8 +162,8 @@ define <vscale x 2 x i64> @vwmulsu_vx_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, i3
; CHECK-LABEL: vwmulsu_vx_nxv2i64_nxv2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT: vwmulsu.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwmulsu.vx v8, v10, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i32> undef, i32 %b, i32 0
%splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
@@ -174,8 +177,9 @@ define <vscale x 4 x i64> @vwmul_vv_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, <vsc
; CHECK-LABEL: vwmul_vv_nxv4i64_nxv4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vwmul.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwmul.vv v8, v14, v12
; CHECK-NEXT: ret
%vc = sext <vscale x 4 x i32> %va to <vscale x 4 x i64>
%vd = sext <vscale x 4 x i32> %vb to <vscale x 4 x i64>
@@ -187,8 +191,9 @@ define <vscale x 4 x i64> @vwmulu_vv_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, <vs
; CHECK-LABEL: vwmulu_vv_nxv4i64_nxv4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vwmulu.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwmulu.vv v8, v14, v12
; CHECK-NEXT: ret
%vc = zext <vscale x 4 x i32> %va to <vscale x 4 x i64>
%vd = zext <vscale x 4 x i32> %vb to <vscale x 4 x i64>
@@ -200,8 +205,9 @@ define <vscale x 4 x i64> @vwmulsu_vv_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, <v
; CHECK-LABEL: vwmulsu_vv_nxv4i64_nxv4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vwmulsu.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwmulsu.vv v8, v14, v12
; CHECK-NEXT: ret
%vc = sext <vscale x 4 x i32> %va to <vscale x 4 x i64>
%vd = zext <vscale x 4 x i32> %vb to <vscale x 4 x i64>
@@ -213,8 +219,8 @@ define <vscale x 4 x i64> @vwmul_vx_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, i32
; CHECK-LABEL: vwmul_vx_nxv4i64_nxv4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT: vwmul.vx v12, v8, a0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwmul.vx v8, v12, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i32> undef, i32 %b, i32 0
%splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
@@ -228,8 +234,8 @@ define <vscale x 4 x i64> @vwmulu_vx_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, i32
; CHECK-LABEL: vwmulu_vx_nxv4i64_nxv4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT: vwmulu.vx v12, v8, a0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwmulu.vx v8, v12, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i32> undef, i32 %b, i32 0
%splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
@@ -243,8 +249,8 @@ define <vscale x 4 x i64> @vwmulsu_vx_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, i3
; CHECK-LABEL: vwmulsu_vx_nxv4i64_nxv4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT: vwmulsu.vx v12, v8, a0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwmulsu.vx v8, v12, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i32> undef, i32 %b, i32 0
%splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
@@ -258,8 +264,9 @@ define <vscale x 8 x i64> @vwmul_vv_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, <vsc
; CHECK-LABEL: vwmul_vv_nxv8i64_nxv8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vwmul.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vwmul.vv v8, v20, v16
; CHECK-NEXT: ret
%vc = sext <vscale x 8 x i32> %va to <vscale x 8 x i64>
%vd = sext <vscale x 8 x i32> %vb to <vscale x 8 x i64>
@@ -271,8 +278,9 @@ define <vscale x 8 x i64> @vwmulu_vv_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, <vs
; CHECK-LABEL: vwmulu_vv_nxv8i64_nxv8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vwmulu.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vwmulu.vv v8, v20, v16
; CHECK-NEXT: ret
%vc = zext <vscale x 8 x i32> %va to <vscale x 8 x i64>
%vd = zext <vscale x 8 x i32> %vb to <vscale x 8 x i64>
@@ -284,8 +292,9 @@ define <vscale x 8 x i64> @vwmulsu_vv_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, <v
; CHECK-LABEL: vwmulsu_vv_nxv8i64_nxv8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vwmulsu.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vwmulsu.vv v8, v20, v16
; CHECK-NEXT: ret
%vc = sext <vscale x 8 x i32> %va to <vscale x 8 x i64>
%vd = zext <vscale x 8 x i32> %vb to <vscale x 8 x i64>
@@ -297,8 +306,8 @@ define <vscale x 8 x i64> @vwmul_vx_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, i32
; CHECK-LABEL: vwmul_vx_nxv8i64_nxv8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT: vwmul.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwmul.vx v8, v16, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i32> undef, i32 %b, i32 0
%splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
@@ -312,8 +321,8 @@ define <vscale x 8 x i64> @vwmulu_vx_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, i32
; CHECK-LABEL: vwmulu_vx_nxv8i64_nxv8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT: vwmulu.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwmulu.vx v8, v16, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i32> undef, i32 %b, i32 0
%splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
@@ -327,8 +336,8 @@ define <vscale x 8 x i64> @vwmulsu_vx_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, i3
; CHECK-LABEL: vwmulsu_vx_nxv8i64_nxv8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT: vwmulsu.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwmulsu.vx v8, v16, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i32> undef, i32 %b, i32 0
%splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmul.ll b/llvm/test/CodeGen/RISCV/rvv/vwmul.ll
index f88765dc1e252..3c8b6c8c4b561 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwmul.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwmul.ll
@@ -158,8 +158,9 @@ define <vscale x 8 x i16> @intrinsic_vwmul_vv_nxv8i16_nxv8i8_nxv8i8(<vscale x 8
; CHECK-LABEL: intrinsic_vwmul_vv_nxv8i16_nxv8i8_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vwmul.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwmul.vv v8, v11, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwmul.nxv8i16.nxv8i8.nxv8i8(
@@ -206,8 +207,9 @@ define <vscale x 16 x i16> @intrinsic_vwmul_vv_nxv16i16_nxv16i8_nxv16i8(<vscale
; CHECK-LABEL: intrinsic_vwmul_vv_nxv16i16_nxv16i8_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT: vwmul.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwmul.vv v8, v14, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwmul.nxv16i16.nxv16i8.nxv16i8(
@@ -254,8 +256,9 @@ define <vscale x 32 x i16> @intrinsic_vwmul_vv_nxv32i16_nxv32i8_nxv32i8(<vscale
; CHECK-LABEL: intrinsic_vwmul_vv_nxv32i16_nxv32i8_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT: vwmul.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vwmul.vv v8, v20, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwmul.nxv32i16.nxv32i8.nxv32i8(
@@ -398,8 +401,9 @@ define <vscale x 4 x i32> @intrinsic_vwmul_vv_nxv4i32_nxv4i16_nxv4i16(<vscale x
; CHECK-LABEL: intrinsic_vwmul_vv_nxv4i32_nxv4i16_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vwmul.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwmul.vv v8, v11, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwmul.nxv4i32.nxv4i16.nxv4i16(
@@ -446,8 +450,9 @@ define <vscale x 8 x i32> @intrinsic_vwmul_vv_nxv8i32_nxv8i16_nxv8i16(<vscale x
; CHECK-LABEL: intrinsic_vwmul_vv_nxv8i32_nxv8i16_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vwmul.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwmul.vv v8, v14, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwmul.nxv8i32.nxv8i16.nxv8i16(
@@ -494,8 +499,9 @@ define <vscale x 16 x i32> @intrinsic_vwmul_vv_nxv16i32_nxv16i16_nxv16i16(<vscal
; CHECK-LABEL: intrinsic_vwmul_vv_nxv16i32_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vwmul.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vwmul.vv v8, v20, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwmul.nxv16i32.nxv16i16.nxv16i16(
@@ -590,8 +596,9 @@ define <vscale x 2 x i64> @intrinsic_vwmul_vv_nxv2i64_nxv2i32_nxv2i32(<vscale x
; CHECK-LABEL: intrinsic_vwmul_vv_nxv2i64_nxv2i32_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vwmul.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwmul.vv v8, v11, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwmul.nxv2i64.nxv2i32.nxv2i32(
@@ -638,8 +645,9 @@ define <vscale x 4 x i64> @intrinsic_vwmul_vv_nxv4i64_nxv4i32_nxv4i32(<vscale x
; CHECK-LABEL: intrinsic_vwmul_vv_nxv4i64_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vwmul.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwmul.vv v8, v14, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwmul.nxv4i64.nxv4i32.nxv4i32(
@@ -686,8 +694,9 @@ define <vscale x 8 x i64> @intrinsic_vwmul_vv_nxv8i64_nxv8i32_nxv8i32(<vscale x
; CHECK-LABEL: intrinsic_vwmul_vv_nxv8i64_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vwmul.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vwmul.vv v8, v20, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwmul.nxv8i64.nxv8i32.nxv8i32(
@@ -878,8 +887,8 @@ define <vscale x 8 x i16> @intrinsic_vwmul_vx_nxv8i16_nxv8i8_i8(<vscale x 8 x i8
; CHECK-LABEL: intrinsic_vwmul_vx_nxv8i16_nxv8i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vwmul.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwmul.vx v8, v10, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwmul.nxv8i16.nxv8i8.i8(
@@ -926,8 +935,8 @@ define <vscale x 16 x i16> @intrinsic_vwmul_vx_nxv16i16_nxv16i8_i8(<vscale x 16
; CHECK-LABEL: intrinsic_vwmul_vx_nxv16i16_nxv16i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT: vwmul.vx v12, v8, a0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwmul.vx v8, v12, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwmul.nxv16i16.nxv16i8.i8(
@@ -974,8 +983,8 @@ define <vscale x 32 x i16> @intrinsic_vwmul_vx_nxv32i16_nxv32i8_i8(<vscale x 32
; CHECK-LABEL: intrinsic_vwmul_vx_nxv32i16_nxv32i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT: vwmul.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwmul.vx v8, v16, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwmul.nxv32i16.nxv32i8.i8(
@@ -1118,8 +1127,8 @@ define <vscale x 4 x i32> @intrinsic_vwmul_vx_nxv4i32_nxv4i16_i16(<vscale x 4 x
; CHECK-LABEL: intrinsic_vwmul_vx_nxv4i32_nxv4i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vwmul.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwmul.vx v8, v10, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwmul.nxv4i32.nxv4i16.i16(
@@ -1166,8 +1175,8 @@ define <vscale x 8 x i32> @intrinsic_vwmul_vx_nxv8i32_nxv8i16_i16(<vscale x 8 x
; CHECK-LABEL: intrinsic_vwmul_vx_nxv8i32_nxv8i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vwmul.vx v12, v8, a0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwmul.vx v8, v12, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwmul.nxv8i32.nxv8i16.i16(
@@ -1214,8 +1223,8 @@ define <vscale x 16 x i32> @intrinsic_vwmul_vx_nxv16i32_nxv16i16_i16(<vscale x 1
; CHECK-LABEL: intrinsic_vwmul_vx_nxv16i32_nxv16i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vwmul.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwmul.vx v8, v16, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwmul.nxv16i32.nxv16i16.i16(
@@ -1310,8 +1319,8 @@ define <vscale x 2 x i64> @intrinsic_vwmul_vx_nxv2i64_nxv2i32_i32(<vscale x 2 x
; CHECK-LABEL: intrinsic_vwmul_vx_nxv2i64_nxv2i32_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vwmul.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwmul.vx v8, v10, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwmul.nxv2i64.nxv2i32.i32(
@@ -1358,8 +1367,8 @@ define <vscale x 4 x i64> @intrinsic_vwmul_vx_nxv4i64_nxv4i32_i32(<vscale x 4 x
; CHECK-LABEL: intrinsic_vwmul_vx_nxv4i64_nxv4i32_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vwmul.vx v12, v8, a0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwmul.vx v8, v12, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwmul.nxv4i64.nxv4i32.i32(
@@ -1406,8 +1415,8 @@ define <vscale x 8 x i64> @intrinsic_vwmul_vx_nxv8i64_nxv8i32_i32(<vscale x 8 x
; CHECK-LABEL: intrinsic_vwmul_vx_nxv8i64_nxv8i32_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vwmul.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwmul.vx v8, v16, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwmul.nxv8i64.nxv8i32.i32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmulsu.ll b/llvm/test/CodeGen/RISCV/rvv/vwmulsu.ll
index a44be068b0a65..fa0f935513493 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwmulsu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwmulsu.ll
@@ -158,8 +158,9 @@ define <vscale x 8 x i16> @intrinsic_vwmulsu_vv_nxv8i16_nxv8i8_nxv8i8(<vscale x
; CHECK-LABEL: intrinsic_vwmulsu_vv_nxv8i16_nxv8i8_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vwmulsu.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwmulsu.vv v8, v11, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwmulsu.nxv8i16.nxv8i8.nxv8i8(
@@ -206,8 +207,9 @@ define <vscale x 16 x i16> @intrinsic_vwmulsu_vv_nxv16i16_nxv16i8_nxv16i8(<vscal
; CHECK-LABEL: intrinsic_vwmulsu_vv_nxv16i16_nxv16i8_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT: vwmulsu.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwmulsu.vv v8, v14, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwmulsu.nxv16i16.nxv16i8.nxv16i8(
@@ -254,8 +256,9 @@ define <vscale x 32 x i16> @intrinsic_vwmulsu_vv_nxv32i16_nxv32i8_nxv32i8(<vscal
; CHECK-LABEL: intrinsic_vwmulsu_vv_nxv32i16_nxv32i8_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT: vwmulsu.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vwmulsu.vv v8, v20, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwmulsu.nxv32i16.nxv32i8.nxv32i8(
@@ -398,8 +401,9 @@ define <vscale x 4 x i32> @intrinsic_vwmulsu_vv_nxv4i32_nxv4i16_nxv4i16(<vscale
; CHECK-LABEL: intrinsic_vwmulsu_vv_nxv4i32_nxv4i16_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vwmulsu.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwmulsu.vv v8, v11, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwmulsu.nxv4i32.nxv4i16.nxv4i16(
@@ -446,8 +450,9 @@ define <vscale x 8 x i32> @intrinsic_vwmulsu_vv_nxv8i32_nxv8i16_nxv8i16(<vscale
; CHECK-LABEL: intrinsic_vwmulsu_vv_nxv8i32_nxv8i16_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vwmulsu.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwmulsu.vv v8, v14, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwmulsu.nxv8i32.nxv8i16.nxv8i16(
@@ -494,8 +499,9 @@ define <vscale x 16 x i32> @intrinsic_vwmulsu_vv_nxv16i32_nxv16i16_nxv16i16(<vsc
; CHECK-LABEL: intrinsic_vwmulsu_vv_nxv16i32_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vwmulsu.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vwmulsu.vv v8, v20, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwmulsu.nxv16i32.nxv16i16.nxv16i16(
@@ -590,8 +596,9 @@ define <vscale x 2 x i64> @intrinsic_vwmulsu_vv_nxv2i64_nxv2i32_nxv2i32(<vscale
; CHECK-LABEL: intrinsic_vwmulsu_vv_nxv2i64_nxv2i32_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vwmulsu.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwmulsu.vv v8, v11, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwmulsu.nxv2i64.nxv2i32.nxv2i32(
@@ -638,8 +645,9 @@ define <vscale x 4 x i64> @intrinsic_vwmulsu_vv_nxv4i64_nxv4i32_nxv4i32(<vscale
; CHECK-LABEL: intrinsic_vwmulsu_vv_nxv4i64_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vwmulsu.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwmulsu.vv v8, v14, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwmulsu.nxv4i64.nxv4i32.nxv4i32(
@@ -686,8 +694,9 @@ define <vscale x 8 x i64> @intrinsic_vwmulsu_vv_nxv8i64_nxv8i32_nxv8i32(<vscale
; CHECK-LABEL: intrinsic_vwmulsu_vv_nxv8i64_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vwmulsu.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vwmulsu.vv v8, v20, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwmulsu.nxv8i64.nxv8i32.nxv8i32(
@@ -878,8 +887,8 @@ define <vscale x 8 x i16> @intrinsic_vwmulsu_vx_nxv8i16_nxv8i8_i8(<vscale x 8 x
; CHECK-LABEL: intrinsic_vwmulsu_vx_nxv8i16_nxv8i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vwmulsu.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwmulsu.vx v8, v10, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwmulsu.nxv8i16.nxv8i8.i8(
@@ -926,8 +935,8 @@ define <vscale x 16 x i16> @intrinsic_vwmulsu_vx_nxv16i16_nxv16i8_i8(<vscale x 1
; CHECK-LABEL: intrinsic_vwmulsu_vx_nxv16i16_nxv16i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT: vwmulsu.vx v12, v8, a0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwmulsu.vx v8, v12, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwmulsu.nxv16i16.nxv16i8.i8(
@@ -974,8 +983,8 @@ define <vscale x 32 x i16> @intrinsic_vwmulsu_vx_nxv32i16_nxv32i8_i8(<vscale x 3
; CHECK-LABEL: intrinsic_vwmulsu_vx_nxv32i16_nxv32i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT: vwmulsu.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwmulsu.vx v8, v16, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwmulsu.nxv32i16.nxv32i8.i8(
@@ -1118,8 +1127,8 @@ define <vscale x 4 x i32> @intrinsic_vwmulsu_vx_nxv4i32_nxv4i16_i16(<vscale x 4
; CHECK-LABEL: intrinsic_vwmulsu_vx_nxv4i32_nxv4i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vwmulsu.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwmulsu.vx v8, v10, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwmulsu.nxv4i32.nxv4i16.i16(
@@ -1166,8 +1175,8 @@ define <vscale x 8 x i32> @intrinsic_vwmulsu_vx_nxv8i32_nxv8i16_i16(<vscale x 8
; CHECK-LABEL: intrinsic_vwmulsu_vx_nxv8i32_nxv8i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vwmulsu.vx v12, v8, a0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwmulsu.vx v8, v12, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwmulsu.nxv8i32.nxv8i16.i16(
@@ -1214,8 +1223,8 @@ define <vscale x 16 x i32> @intrinsic_vwmulsu_vx_nxv16i32_nxv16i16_i16(<vscale x
; CHECK-LABEL: intrinsic_vwmulsu_vx_nxv16i32_nxv16i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vwmulsu.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwmulsu.vx v8, v16, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwmulsu.nxv16i32.nxv16i16.i16(
@@ -1310,8 +1319,8 @@ define <vscale x 2 x i64> @intrinsic_vwmulsu_vx_nxv2i64_nxv2i32_i32(<vscale x 2
; CHECK-LABEL: intrinsic_vwmulsu_vx_nxv2i64_nxv2i32_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vwmulsu.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwmulsu.vx v8, v10, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwmulsu.nxv2i64.nxv2i32.i32(
@@ -1358,8 +1367,8 @@ define <vscale x 4 x i64> @intrinsic_vwmulsu_vx_nxv4i64_nxv4i32_i32(<vscale x 4
; CHECK-LABEL: intrinsic_vwmulsu_vx_nxv4i64_nxv4i32_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vwmulsu.vx v12, v8, a0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwmulsu.vx v8, v12, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwmulsu.nxv4i64.nxv4i32.i32(
@@ -1406,8 +1415,8 @@ define <vscale x 8 x i64> @intrinsic_vwmulsu_vx_nxv8i64_nxv8i32_i32(<vscale x 8
; CHECK-LABEL: intrinsic_vwmulsu_vx_nxv8i64_nxv8i32_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vwmulsu.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwmulsu.vx v8, v16, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwmulsu.nxv8i64.nxv8i32.i32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmulu.ll b/llvm/test/CodeGen/RISCV/rvv/vwmulu.ll
index c1281ec4d1be2..8c9f245921b66 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwmulu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwmulu.ll
@@ -158,8 +158,9 @@ define <vscale x 8 x i16> @intrinsic_vwmulu_vv_nxv8i16_nxv8i8_nxv8i8(<vscale x 8
; CHECK-LABEL: intrinsic_vwmulu_vv_nxv8i16_nxv8i8_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vwmulu.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwmulu.vv v8, v11, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwmulu.nxv8i16.nxv8i8.nxv8i8(
@@ -206,8 +207,9 @@ define <vscale x 16 x i16> @intrinsic_vwmulu_vv_nxv16i16_nxv16i8_nxv16i8(<vscale
; CHECK-LABEL: intrinsic_vwmulu_vv_nxv16i16_nxv16i8_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT: vwmulu.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwmulu.vv v8, v14, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwmulu.nxv16i16.nxv16i8.nxv16i8(
@@ -254,8 +256,9 @@ define <vscale x 32 x i16> @intrinsic_vwmulu_vv_nxv32i16_nxv32i8_nxv32i8(<vscale
; CHECK-LABEL: intrinsic_vwmulu_vv_nxv32i16_nxv32i8_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT: vwmulu.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vwmulu.vv v8, v20, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwmulu.nxv32i16.nxv32i8.nxv32i8(
@@ -398,8 +401,9 @@ define <vscale x 4 x i32> @intrinsic_vwmulu_vv_nxv4i32_nxv4i16_nxv4i16(<vscale x
; CHECK-LABEL: intrinsic_vwmulu_vv_nxv4i32_nxv4i16_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vwmulu.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwmulu.vv v8, v11, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwmulu.nxv4i32.nxv4i16.nxv4i16(
@@ -446,8 +450,9 @@ define <vscale x 8 x i32> @intrinsic_vwmulu_vv_nxv8i32_nxv8i16_nxv8i16(<vscale x
; CHECK-LABEL: intrinsic_vwmulu_vv_nxv8i32_nxv8i16_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vwmulu.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwmulu.vv v8, v14, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwmulu.nxv8i32.nxv8i16.nxv8i16(
@@ -494,8 +499,9 @@ define <vscale x 16 x i32> @intrinsic_vwmulu_vv_nxv16i32_nxv16i16_nxv16i16(<vsca
; CHECK-LABEL: intrinsic_vwmulu_vv_nxv16i32_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vwmulu.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vwmulu.vv v8, v20, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwmulu.nxv16i32.nxv16i16.nxv16i16(
@@ -590,8 +596,9 @@ define <vscale x 2 x i64> @intrinsic_vwmulu_vv_nxv2i64_nxv2i32_nxv2i32(<vscale x
; CHECK-LABEL: intrinsic_vwmulu_vv_nxv2i64_nxv2i32_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vwmulu.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwmulu.vv v8, v11, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwmulu.nxv2i64.nxv2i32.nxv2i32(
@@ -638,8 +645,9 @@ define <vscale x 4 x i64> @intrinsic_vwmulu_vv_nxv4i64_nxv4i32_nxv4i32(<vscale x
; CHECK-LABEL: intrinsic_vwmulu_vv_nxv4i64_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vwmulu.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwmulu.vv v8, v14, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwmulu.nxv4i64.nxv4i32.nxv4i32(
@@ -686,8 +694,9 @@ define <vscale x 8 x i64> @intrinsic_vwmulu_vv_nxv8i64_nxv8i32_nxv8i32(<vscale x
; CHECK-LABEL: intrinsic_vwmulu_vv_nxv8i64_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vwmulu.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vwmulu.vv v8, v20, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwmulu.nxv8i64.nxv8i32.nxv8i32(
@@ -878,8 +887,8 @@ define <vscale x 8 x i16> @intrinsic_vwmulu_vx_nxv8i16_nxv8i8_i8(<vscale x 8 x i
; CHECK-LABEL: intrinsic_vwmulu_vx_nxv8i16_nxv8i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vwmulu.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwmulu.vx v8, v10, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwmulu.nxv8i16.nxv8i8.i8(
@@ -926,8 +935,8 @@ define <vscale x 16 x i16> @intrinsic_vwmulu_vx_nxv16i16_nxv16i8_i8(<vscale x 16
; CHECK-LABEL: intrinsic_vwmulu_vx_nxv16i16_nxv16i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT: vwmulu.vx v12, v8, a0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwmulu.vx v8, v12, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwmulu.nxv16i16.nxv16i8.i8(
@@ -974,8 +983,8 @@ define <vscale x 32 x i16> @intrinsic_vwmulu_vx_nxv32i16_nxv32i8_i8(<vscale x 32
; CHECK-LABEL: intrinsic_vwmulu_vx_nxv32i16_nxv32i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT: vwmulu.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwmulu.vx v8, v16, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwmulu.nxv32i16.nxv32i8.i8(
@@ -1118,8 +1127,8 @@ define <vscale x 4 x i32> @intrinsic_vwmulu_vx_nxv4i32_nxv4i16_i16(<vscale x 4 x
; CHECK-LABEL: intrinsic_vwmulu_vx_nxv4i32_nxv4i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vwmulu.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwmulu.vx v8, v10, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwmulu.nxv4i32.nxv4i16.i16(
@@ -1166,8 +1175,8 @@ define <vscale x 8 x i32> @intrinsic_vwmulu_vx_nxv8i32_nxv8i16_i16(<vscale x 8 x
; CHECK-LABEL: intrinsic_vwmulu_vx_nxv8i32_nxv8i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vwmulu.vx v12, v8, a0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwmulu.vx v8, v12, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwmulu.nxv8i32.nxv8i16.i16(
@@ -1214,8 +1223,8 @@ define <vscale x 16 x i32> @intrinsic_vwmulu_vx_nxv16i32_nxv16i16_i16(<vscale x
; CHECK-LABEL: intrinsic_vwmulu_vx_nxv16i32_nxv16i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vwmulu.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwmulu.vx v8, v16, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwmulu.nxv16i32.nxv16i16.i16(
@@ -1310,8 +1319,8 @@ define <vscale x 2 x i64> @intrinsic_vwmulu_vx_nxv2i64_nxv2i32_i32(<vscale x 2 x
; CHECK-LABEL: intrinsic_vwmulu_vx_nxv2i64_nxv2i32_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vwmulu.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwmulu.vx v8, v10, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwmulu.nxv2i64.nxv2i32.i32(
@@ -1358,8 +1367,8 @@ define <vscale x 4 x i64> @intrinsic_vwmulu_vx_nxv4i64_nxv4i32_i32(<vscale x 4 x
; CHECK-LABEL: intrinsic_vwmulu_vx_nxv4i64_nxv4i32_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vwmulu.vx v12, v8, a0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwmulu.vx v8, v12, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwmulu.nxv4i64.nxv4i32.i32(
@@ -1406,8 +1415,8 @@ define <vscale x 8 x i64> @intrinsic_vwmulu_vx_nxv8i64_nxv8i32_i32(<vscale x 8 x
; CHECK-LABEL: intrinsic_vwmulu_vx_nxv8i64_nxv8i32_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vwmulu.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwmulu.vx v8, v16, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwmulu.nxv8i64.nxv8i32.i32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll
index fd09fe791b4fd..381f57e59aa76 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll
@@ -20,8 +20,9 @@ define <vscale x 2 x i64> @vwsll_vv_nxv2i64_sext(<vscale x 2 x i32> %a, <vscale
; CHECK-ZVBB-LABEL: vwsll_vv_nxv2i64_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v9
+; CHECK-ZVBB-NEXT: vmv1r.v v11, v8
+; CHECK-ZVBB-NEXT: vwsll.vv v8, v11, v10
; CHECK-ZVBB-NEXT: ret
%x = zext <vscale x 2 x i32> %a to <vscale x 2 x i64>
%y = sext <vscale x 2 x i32> %b to <vscale x 2 x i64>
@@ -41,8 +42,9 @@ define <vscale x 2 x i64> @vwsll_vv_nxv2i64_zext(<vscale x 2 x i32> %a, <vscale
; CHECK-ZVBB-LABEL: vwsll_vv_nxv2i64_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v9
+; CHECK-ZVBB-NEXT: vmv1r.v v11, v8
+; CHECK-ZVBB-NEXT: vwsll.vv v8, v11, v10
; CHECK-ZVBB-NEXT: ret
%x = zext <vscale x 2 x i32> %a to <vscale x 2 x i64>
%y = zext <vscale x 2 x i32> %b to <vscale x 2 x i64>
@@ -61,8 +63,8 @@ define <vscale x 2 x i64> @vwsll_vx_i64_nxv2i64(<vscale x 2 x i32> %a, i64 %b) {
; CHECK-ZVBB-LABEL: vwsll_vx_i64_nxv2i64:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
%splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
@@ -75,18 +77,18 @@ define <vscale x 2 x i64> @vwsll_vx_i32_nxv2i64_sext(<vscale x 2 x i32> %a, i32
; CHECK-LABEL: vwsll_vx_i32_nxv2i64_sext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vsext.vf2 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vsext.vf2 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv2i64_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
%splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -100,18 +102,18 @@ define <vscale x 2 x i64> @vwsll_vx_i32_nxv2i64_zext(<vscale x 2 x i32> %a, i32
; CHECK-LABEL: vwsll_vx_i32_nxv2i64_zext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vzext.vf2 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vzext.vf2 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv2i64_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
%splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -125,18 +127,18 @@ define <vscale x 2 x i64> @vwsll_vx_i16_nxv2i64_sext(<vscale x 2 x i32> %a, i16
; CHECK-LABEL: vwsll_vx_i16_nxv2i64_sext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vsext.vf4 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vsext.vf4 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv2i64_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
%splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -150,18 +152,18 @@ define <vscale x 2 x i64> @vwsll_vx_i16_nxv2i64_zext(<vscale x 2 x i32> %a, i16
; CHECK-LABEL: vwsll_vx_i16_nxv2i64_zext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vzext.vf4 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vzext.vf4 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv2i64_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
%splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -175,18 +177,18 @@ define <vscale x 2 x i64> @vwsll_vx_i8_nxv2i64_sext(<vscale x 2 x i32> %a, i8 %b
; CHECK-LABEL: vwsll_vx_i8_nxv2i64_sext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vsext.vf8 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vsext.vf8 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv2i64_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
%splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
@@ -200,18 +202,18 @@ define <vscale x 2 x i64> @vwsll_vx_i8_nxv2i64_zext(<vscale x 2 x i32> %a, i8 %b
; CHECK-LABEL: vwsll_vx_i8_nxv2i64_zext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vzext.vf8 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vzext.vf8 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv2i64_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
%splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
@@ -224,17 +226,17 @@ define <vscale x 2 x i64> @vwsll_vx_i8_nxv2i64_zext(<vscale x 2 x i32> %a, i8 %b
define <vscale x 2 x i64> @vwsll_vi_nxv2i64(<vscale x 2 x i32> %a) {
; CHECK-LABEL: vwsll_vi_nxv2i64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v8
; CHECK-NEXT: li a0, 4
-; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT: vwmulu.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vwmulu.vx v8, v10, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vi_nxv2i64:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vi v10, v8, 2
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vi v8, v10, 2
; CHECK-ZVBB-NEXT: ret
%x = zext <vscale x 2 x i32> %a to <vscale x 2 x i64>
%z = shl <vscale x 2 x i64> %x, splat (i64 2)
@@ -257,8 +259,9 @@ define <vscale x 4 x i32> @vwsll_vv_nxv4i32_sext(<vscale x 4 x i16> %a, <vscale
; CHECK-ZVBB-LABEL: vwsll_vv_nxv4i32_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v9
+; CHECK-ZVBB-NEXT: vmv1r.v v11, v8
+; CHECK-ZVBB-NEXT: vwsll.vv v8, v11, v10
; CHECK-ZVBB-NEXT: ret
%x = zext <vscale x 4 x i16> %a to <vscale x 4 x i32>
%y = sext <vscale x 4 x i16> %b to <vscale x 4 x i32>
@@ -278,8 +281,9 @@ define <vscale x 4 x i32> @vwsll_vv_nxv4i32_zext(<vscale x 4 x i16> %a, <vscale
; CHECK-ZVBB-LABEL: vwsll_vv_nxv4i32_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v9
+; CHECK-ZVBB-NEXT: vmv1r.v v11, v8
+; CHECK-ZVBB-NEXT: vwsll.vv v8, v11, v10
; CHECK-ZVBB-NEXT: ret
%x = zext <vscale x 4 x i16> %a to <vscale x 4 x i32>
%y = zext <vscale x 4 x i16> %b to <vscale x 4 x i32>
@@ -298,8 +302,8 @@ define <vscale x 4 x i32> @vwsll_vx_i64_nxv4i32(<vscale x 4 x i16> %a, i64 %b) {
; CHECK-ZVBB-LABEL: vwsll_vx_i64_nxv4i32:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
%splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
@@ -320,8 +324,8 @@ define <vscale x 4 x i32> @vwsll_vx_i32_nxv4i32(<vscale x 4 x i16> %a, i32 %b) {
; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv4i32:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
%splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
@@ -334,18 +338,18 @@ define <vscale x 4 x i32> @vwsll_vx_i16_nxv4i32_sext(<vscale x 4 x i16> %a, i16
; CHECK-LABEL: vwsll_vx_i16_nxv4i32_sext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vsext.vf2 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vsext.vf2 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv4i32_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
%splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -359,18 +363,18 @@ define <vscale x 4 x i32> @vwsll_vx_i16_nxv4i32_zext(<vscale x 4 x i16> %a, i16
; CHECK-LABEL: vwsll_vx_i16_nxv4i32_zext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vzext.vf2 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vzext.vf2 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv4i32_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
%splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -384,18 +388,18 @@ define <vscale x 4 x i32> @vwsll_vx_i8_nxv4i32_sext(<vscale x 4 x i16> %a, i8 %b
; CHECK-LABEL: vwsll_vx_i8_nxv4i32_sext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vsext.vf4 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vsext.vf4 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv4i32_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
%splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -409,18 +413,18 @@ define <vscale x 4 x i32> @vwsll_vx_i8_nxv4i32_zext(<vscale x 4 x i16> %a, i8 %b
; CHECK-LABEL: vwsll_vx_i8_nxv4i32_zext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vzext.vf4 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vzext.vf4 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv4i32_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
%splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -433,17 +437,17 @@ define <vscale x 4 x i32> @vwsll_vx_i8_nxv4i32_zext(<vscale x 4 x i16> %a, i8 %b
define <vscale x 4 x i32> @vwsll_vi_nxv4i32(<vscale x 4 x i16> %a) {
; CHECK-LABEL: vwsll_vi_nxv4i32:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v8
; CHECK-NEXT: li a0, 4
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vwmulu.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vwmulu.vx v8, v10, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vi_nxv4i32:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vi v10, v8, 2
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vi v8, v10, 2
; CHECK-ZVBB-NEXT: ret
%x = zext <vscale x 4 x i16> %a to <vscale x 4 x i32>
%z = shl <vscale x 4 x i32> %x, splat (i32 2)
@@ -466,8 +470,9 @@ define <vscale x 8 x i16> @vwsll_vv_nxv8i16_sext(<vscale x 8 x i8> %a, <vscale x
; CHECK-ZVBB-LABEL: vwsll_vv_nxv8i16_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v9
+; CHECK-ZVBB-NEXT: vmv1r.v v11, v8
+; CHECK-ZVBB-NEXT: vwsll.vv v8, v11, v10
; CHECK-ZVBB-NEXT: ret
%x = zext <vscale x 8 x i8> %a to <vscale x 8 x i16>
%y = sext <vscale x 8 x i8> %b to <vscale x 8 x i16>
@@ -487,8 +492,9 @@ define <vscale x 8 x i16> @vwsll_vv_nxv8i16_zext(<vscale x 8 x i8> %a, <vscale x
; CHECK-ZVBB-LABEL: vwsll_vv_nxv8i16_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v9
+; CHECK-ZVBB-NEXT: vmv1r.v v11, v8
+; CHECK-ZVBB-NEXT: vwsll.vv v8, v11, v10
; CHECK-ZVBB-NEXT: ret
%x = zext <vscale x 8 x i8> %a to <vscale x 8 x i16>
%y = zext <vscale x 8 x i8> %b to <vscale x 8 x i16>
@@ -507,8 +513,8 @@ define <vscale x 8 x i16> @vwsll_vx_i64_nxv8i16(<vscale x 8 x i8> %a, i64 %b) {
; CHECK-ZVBB-LABEL: vwsll_vx_i64_nxv8i16:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
%splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -529,8 +535,8 @@ define <vscale x 8 x i16> @vwsll_vx_i32_nxv8i16(<vscale x 8 x i8> %a, i32 %b) {
; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv8i16:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
%splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -551,8 +557,8 @@ define <vscale x 8 x i16> @vwsll_vx_i16_nxv8i16(<vscale x 8 x i8> %a, i16 %b) {
; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv8i16:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
%splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -565,18 +571,18 @@ define <vscale x 8 x i16> @vwsll_vx_i8_nxv8i16_sext(<vscale x 8 x i8> %a, i8 %b)
; CHECK-LABEL: vwsll_vx_i8_nxv8i16_sext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vsext.vf2 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vsext.vf2 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv8i16_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
%splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -590,18 +596,18 @@ define <vscale x 8 x i16> @vwsll_vx_i8_nxv8i16_zext(<vscale x 8 x i8> %a, i8 %b)
; CHECK-LABEL: vwsll_vx_i8_nxv8i16_zext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vzext.vf2 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vzext.vf2 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv8i16_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
%splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -614,17 +620,17 @@ define <vscale x 8 x i16> @vwsll_vx_i8_nxv8i16_zext(<vscale x 8 x i8> %a, i8 %b)
define <vscale x 8 x i16> @vwsll_vi_nxv8i16(<vscale x 8 x i8> %a) {
; CHECK-LABEL: vwsll_vi_nxv8i16:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v8
; CHECK-NEXT: li a0, 4
-; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT: vwmulu.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vwmulu.vx v8, v10, a0
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vi_nxv8i16:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vi v10, v8, 2
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vi v8, v10, 2
; CHECK-ZVBB-NEXT: ret
%x = zext <vscale x 8 x i8> %a to <vscale x 8 x i16>
%z = shl <vscale x 8 x i16> %x, splat (i16 2)
@@ -704,21 +710,21 @@ define <vscale x 2 x i64> @vwsll_vx_i32_nxv2i64_nxv2i8_sext(<vscale x 2 x i8> %a
; CHECK-LABEL: vwsll_vx_i32_nxv2i64_nxv2i8_sext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf8 v10, v8
-; CHECK-NEXT: vsext.vf2 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vsext.vf2 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv2i64_nxv2i8_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
+; CHECK-ZVBB-NEXT: vmv.v.x v12, a0
; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8
-; CHECK-ZVBB-NEXT: vsext.vf2 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vsext.vf2 v8, v12
+; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v8
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
%splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -732,11 +738,11 @@ define <vscale x 2 x i64> @vwsll_vx_i32_nxv2i64_nxv2i8_zext(<vscale x 2 x i8> %a
; CHECK-LABEL: vwsll_vx_i32_nxv2i64_nxv2i8_zext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf8 v10, v8
-; CHECK-NEXT: vzext.vf2 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vzext.vf2 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv2i64_nxv2i8_zext:
@@ -757,21 +763,21 @@ define <vscale x 2 x i64> @vwsll_vx_i16_nxv2i64_nxv2i8_sext(<vscale x 2 x i8> %a
; CHECK-LABEL: vwsll_vx_i16_nxv2i64_nxv2i8_sext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf8 v10, v8
-; CHECK-NEXT: vsext.vf4 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vsext.vf4 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv2i64_nxv2i8_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
+; CHECK-ZVBB-NEXT: vmv.v.x v12, a0
; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8
-; CHECK-ZVBB-NEXT: vsext.vf4 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vsext.vf4 v8, v12
+; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v8
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
%splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -785,11 +791,11 @@ define <vscale x 2 x i64> @vwsll_vx_i16_nxv2i64_nxv2i8_zext(<vscale x 2 x i8> %a
; CHECK-LABEL: vwsll_vx_i16_nxv2i64_nxv2i8_zext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf8 v10, v8
-; CHECK-NEXT: vzext.vf4 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vzext.vf4 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv2i64_nxv2i8_zext:
@@ -810,21 +816,21 @@ define <vscale x 2 x i64> @vwsll_vx_i8_nxv2i64_nxv2i8_sext(<vscale x 2 x i8> %a,
; CHECK-LABEL: vwsll_vx_i8_nxv2i64_nxv2i8_sext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf8 v10, v8
-; CHECK-NEXT: vsext.vf8 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vsext.vf8 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv2i64_nxv2i8_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
-; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
+; CHECK-ZVBB-NEXT: vmv.v.x v12, a0
; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8
-; CHECK-ZVBB-NEXT: vsext.vf8 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vsext.vf8 v8, v12
+; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v8
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
%splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
@@ -838,11 +844,11 @@ define <vscale x 2 x i64> @vwsll_vx_i8_nxv2i64_nxv2i8_zext(<vscale x 2 x i8> %a,
; CHECK-LABEL: vwsll_vx_i8_nxv2i64_nxv2i8_zext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf8 v10, v8
-; CHECK-NEXT: vzext.vf8 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: vzext.vf8 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv2i64_nxv2i8_zext:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll
index 1358a7c69cb8a..87bb5fa0238ce 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll
@@ -22,8 +22,9 @@ define <vscale x 2 x i64> @vwsll_vv_nxv2i64_sext(<vscale x 2 x i32> %a, <vscale
; CHECK-ZVBB-LABEL: vwsll_vv_nxv2i64_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9, v0.t
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v9
+; CHECK-ZVBB-NEXT: vmv1r.v v11, v8
+; CHECK-ZVBB-NEXT: vwsll.vv v8, v11, v10, v0.t
; CHECK-ZVBB-NEXT: ret
%x = zext <vscale x 2 x i32> %a to <vscale x 2 x i64>
%y = sext <vscale x 2 x i32> %b to <vscale x 2 x i64>
@@ -43,8 +44,9 @@ define <vscale x 2 x i64> @vwsll_vv_nxv2i64_zext(<vscale x 2 x i32> %a, <vscale
; CHECK-ZVBB-LABEL: vwsll_vv_nxv2i64_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9, v0.t
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v9
+; CHECK-ZVBB-NEXT: vmv1r.v v11, v8
+; CHECK-ZVBB-NEXT: vwsll.vv v8, v11, v10, v0.t
; CHECK-ZVBB-NEXT: ret
%x = zext <vscale x 2 x i32> %a to <vscale x 2 x i64>
%y = zext <vscale x 2 x i32> %b to <vscale x 2 x i64>
@@ -70,15 +72,15 @@ define <vscale x 2 x i64> @vwsll_vx_i64_nxv2i64(<vscale x 2 x i32> %a, i64 %b, <
; CHECK-ZVBB32-LABEL: vwsll_vx_i64_nxv2i64:
; CHECK-ZVBB32: # %bb.0:
; CHECK-ZVBB32-NEXT: vsetvli zero, a2, e32, m1, ta, ma
-; CHECK-ZVBB32-NEXT: vwsll.vx v10, v8, a0, v0.t
-; CHECK-ZVBB32-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB32-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB32-NEXT: vwsll.vx v8, v10, a0, v0.t
; CHECK-ZVBB32-NEXT: ret
;
; CHECK-ZVBB64-LABEL: vwsll_vx_i64_nxv2i64:
; CHECK-ZVBB64: # %bb.0:
; CHECK-ZVBB64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-ZVBB64-NEXT: vwsll.vx v10, v8, a0, v0.t
-; CHECK-ZVBB64-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB64-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB64-NEXT: vwsll.vx v8, v10, a0, v0.t
; CHECK-ZVBB64-NEXT: ret
%head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
%splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
@@ -91,18 +93,18 @@ define <vscale x 2 x i64> @vwsll_vx_i32_nxv2i64_sext(<vscale x 2 x i32> %a, i32
; CHECK-LABEL: vwsll_vx_i32_nxv2i64_sext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vsext.vf2 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vsext.vf2 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv2i64_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0, v0.t
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0, v0.t
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
%splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -116,18 +118,18 @@ define <vscale x 2 x i64> @vwsll_vx_i32_nxv2i64_zext(<vscale x 2 x i32> %a, i32
; CHECK-LABEL: vwsll_vx_i32_nxv2i64_zext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vzext.vf2 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vzext.vf2 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv2i64_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0, v0.t
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0, v0.t
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
%splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -141,18 +143,18 @@ define <vscale x 2 x i64> @vwsll_vx_i16_nxv2i64_sext(<vscale x 2 x i32> %a, i16
; CHECK-LABEL: vwsll_vx_i16_nxv2i64_sext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vsext.vf4 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vsext.vf4 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv2i64_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0, v0.t
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0, v0.t
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
%splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -166,18 +168,18 @@ define <vscale x 2 x i64> @vwsll_vx_i16_nxv2i64_zext(<vscale x 2 x i32> %a, i16
; CHECK-LABEL: vwsll_vx_i16_nxv2i64_zext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vzext.vf4 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vzext.vf4 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv2i64_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0, v0.t
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0, v0.t
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
%splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -191,18 +193,18 @@ define <vscale x 2 x i64> @vwsll_vx_i8_nxv2i64_sext(<vscale x 2 x i32> %a, i8 %b
; CHECK-LABEL: vwsll_vx_i8_nxv2i64_sext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vsext.vf8 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vsext.vf8 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv2i64_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0, v0.t
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0, v0.t
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
%splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
@@ -216,18 +218,18 @@ define <vscale x 2 x i64> @vwsll_vx_i8_nxv2i64_zext(<vscale x 2 x i32> %a, i8 %b
; CHECK-LABEL: vwsll_vx_i8_nxv2i64_zext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vzext.vf8 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vzext.vf8 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv2i64_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0, v0.t
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0, v0.t
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
%splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
@@ -240,17 +242,17 @@ define <vscale x 2 x i64> @vwsll_vx_i8_nxv2i64_zext(<vscale x 2 x i32> %a, i8 %b
define <vscale x 2 x i64> @vwsll_vi_nxv2i64(<vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: vwsll_vi_nxv2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 4
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vwmulu.vx v10, v8, a1, v0.t
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: li a0, 4
+; CHECK-NEXT: vwmulu.vx v8, v10, a0, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vi_nxv2i64:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vi v10, v8, 2, v0.t
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vi v8, v10, 2, v0.t
; CHECK-ZVBB-NEXT: ret
%x = zext <vscale x 2 x i32> %a to <vscale x 2 x i64>
%z = call <vscale x 2 x i64> @llvm.vp.shl.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> splat (i64 2), <vscale x 2 x i1> %m, i32 %vl)
@@ -275,8 +277,9 @@ define <vscale x 4 x i32> @vwsll_vv_nxv4i32_sext(<vscale x 4 x i16> %a, <vscale
; CHECK-ZVBB-LABEL: vwsll_vv_nxv4i32_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9, v0.t
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v9
+; CHECK-ZVBB-NEXT: vmv1r.v v11, v8
+; CHECK-ZVBB-NEXT: vwsll.vv v8, v11, v10, v0.t
; CHECK-ZVBB-NEXT: ret
%x = zext <vscale x 4 x i16> %a to <vscale x 4 x i32>
%y = sext <vscale x 4 x i16> %b to <vscale x 4 x i32>
@@ -296,8 +299,9 @@ define <vscale x 4 x i32> @vwsll_vv_nxv4i32_zext(<vscale x 4 x i16> %a, <vscale
; CHECK-ZVBB-LABEL: vwsll_vv_nxv4i32_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9, v0.t
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v9
+; CHECK-ZVBB-NEXT: vmv1r.v v11, v8
+; CHECK-ZVBB-NEXT: vwsll.vv v8, v11, v10, v0.t
; CHECK-ZVBB-NEXT: ret
%x = zext <vscale x 4 x i16> %a to <vscale x 4 x i32>
%y = zext <vscale x 4 x i16> %b to <vscale x 4 x i32>
@@ -323,15 +327,15 @@ define <vscale x 4 x i32> @vwsll_vx_i64_nxv4i32(<vscale x 4 x i16> %a, i64 %b, <
; CHECK-ZVBB32-LABEL: vwsll_vx_i64_nxv4i32:
; CHECK-ZVBB32: # %bb.0:
; CHECK-ZVBB32-NEXT: vsetvli zero, a2, e16, m1, ta, ma
-; CHECK-ZVBB32-NEXT: vwsll.vx v10, v8, a0, v0.t
-; CHECK-ZVBB32-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB32-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB32-NEXT: vwsll.vx v8, v10, a0, v0.t
; CHECK-ZVBB32-NEXT: ret
;
; CHECK-ZVBB64-LABEL: vwsll_vx_i64_nxv4i32:
; CHECK-ZVBB64: # %bb.0:
; CHECK-ZVBB64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-ZVBB64-NEXT: vwsll.vx v10, v8, a0, v0.t
-; CHECK-ZVBB64-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB64-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB64-NEXT: vwsll.vx v8, v10, a0, v0.t
; CHECK-ZVBB64-NEXT: ret
%head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
%splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
@@ -352,8 +356,8 @@ define <vscale x 4 x i32> @vwsll_vx_i32_nxv4i32(<vscale x 4 x i16> %a, i32 %b, <
; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv4i32:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0, v0.t
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0, v0.t
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
%splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
@@ -366,18 +370,18 @@ define <vscale x 4 x i32> @vwsll_vx_i16_nxv4i32_sext(<vscale x 4 x i16> %a, i16
; CHECK-LABEL: vwsll_vx_i16_nxv4i32_sext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vsext.vf2 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vsext.vf2 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv4i32_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0, v0.t
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0, v0.t
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
%splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -391,18 +395,18 @@ define <vscale x 4 x i32> @vwsll_vx_i16_nxv4i32_zext(<vscale x 4 x i16> %a, i16
; CHECK-LABEL: vwsll_vx_i16_nxv4i32_zext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vzext.vf2 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vzext.vf2 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv4i32_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0, v0.t
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0, v0.t
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
%splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -416,18 +420,18 @@ define <vscale x 4 x i32> @vwsll_vx_i8_nxv4i32_sext(<vscale x 4 x i16> %a, i8 %b
; CHECK-LABEL: vwsll_vx_i8_nxv4i32_sext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vsext.vf4 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vsext.vf4 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv4i32_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0, v0.t
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0, v0.t
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
%splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -441,18 +445,18 @@ define <vscale x 4 x i32> @vwsll_vx_i8_nxv4i32_zext(<vscale x 4 x i16> %a, i8 %b
; CHECK-LABEL: vwsll_vx_i8_nxv4i32_zext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vzext.vf4 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vzext.vf4 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv4i32_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0, v0.t
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0, v0.t
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
%splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -465,17 +469,17 @@ define <vscale x 4 x i32> @vwsll_vx_i8_nxv4i32_zext(<vscale x 4 x i16> %a, i8 %b
define <vscale x 4 x i32> @vwsll_vi_nxv4i32(<vscale x 4 x i16> %a, <vscale x 4 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: vwsll_vi_nxv4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 4
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vwmulu.vx v10, v8, a1, v0.t
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: li a0, 4
+; CHECK-NEXT: vwmulu.vx v8, v10, a0, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vi_nxv4i32:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vi v10, v8, 2, v0.t
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vi v8, v10, 2, v0.t
; CHECK-ZVBB-NEXT: ret
%x = zext <vscale x 4 x i16> %a to <vscale x 4 x i32>
%z = call <vscale x 4 x i32> @llvm.vp.shl.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> splat (i32 2), <vscale x 4 x i1> %m, i32 %vl)
@@ -501,8 +505,9 @@ define <vscale x 8 x i16> @vwsll_vv_nxv8i16_sext(<vscale x 8 x i8> %a, <vscale x
; CHECK-ZVBB-LABEL: vwsll_vv_nxv8i16_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9, v0.t
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v9
+; CHECK-ZVBB-NEXT: vmv1r.v v11, v8
+; CHECK-ZVBB-NEXT: vwsll.vv v8, v11, v10, v0.t
; CHECK-ZVBB-NEXT: ret
%x = zext <vscale x 8 x i8> %a to <vscale x 8 x i16>
%y = sext <vscale x 8 x i8> %b to <vscale x 8 x i16>
@@ -522,8 +527,9 @@ define <vscale x 8 x i16> @vwsll_vv_nxv8i16_zext(<vscale x 8 x i8> %a, <vscale x
; CHECK-ZVBB-LABEL: vwsll_vv_nxv8i16_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9, v0.t
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v9
+; CHECK-ZVBB-NEXT: vmv1r.v v11, v8
+; CHECK-ZVBB-NEXT: vwsll.vv v8, v11, v10, v0.t
; CHECK-ZVBB-NEXT: ret
%x = zext <vscale x 8 x i8> %a to <vscale x 8 x i16>
%y = zext <vscale x 8 x i8> %b to <vscale x 8 x i16>
@@ -549,15 +555,15 @@ define <vscale x 8 x i16> @vwsll_vx_i64_nxv8i16(<vscale x 8 x i8> %a, i64 %b, <v
; CHECK-ZVBB32-LABEL: vwsll_vx_i64_nxv8i16:
; CHECK-ZVBB32: # %bb.0:
; CHECK-ZVBB32-NEXT: vsetvli zero, a2, e8, m1, ta, ma
-; CHECK-ZVBB32-NEXT: vwsll.vx v10, v8, a0, v0.t
-; CHECK-ZVBB32-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB32-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB32-NEXT: vwsll.vx v8, v10, a0, v0.t
; CHECK-ZVBB32-NEXT: ret
;
; CHECK-ZVBB64-LABEL: vwsll_vx_i64_nxv8i16:
; CHECK-ZVBB64: # %bb.0:
; CHECK-ZVBB64-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-ZVBB64-NEXT: vwsll.vx v10, v8, a0, v0.t
-; CHECK-ZVBB64-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB64-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB64-NEXT: vwsll.vx v8, v10, a0, v0.t
; CHECK-ZVBB64-NEXT: ret
%head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
%splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -578,8 +584,8 @@ define <vscale x 8 x i16> @vwsll_vx_i32_nxv8i16(<vscale x 8 x i8> %a, i32 %b, <v
; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv8i16:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0, v0.t
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0, v0.t
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
%splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -600,8 +606,8 @@ define <vscale x 8 x i16> @vwsll_vx_i16_nxv8i16(<vscale x 8 x i8> %a, i16 %b, <v
; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv8i16:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0, v0.t
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0, v0.t
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
%splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -614,18 +620,18 @@ define <vscale x 8 x i16> @vwsll_vx_i8_nxv8i16_sext(<vscale x 8 x i8> %a, i8 %b,
; CHECK-LABEL: vwsll_vx_i8_nxv8i16_sext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vsext.vf2 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vsext.vf2 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv8i16_sext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0, v0.t
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0, v0.t
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
%splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -639,18 +645,18 @@ define <vscale x 8 x i16> @vwsll_vx_i8_nxv8i16_zext(<vscale x 8 x i8> %a, i8 %b,
; CHECK-LABEL: vwsll_vx_i8_nxv8i16_zext:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vzext.vf2 v12, v9
-; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vzext.vf2 v8, v12
+; CHECK-NEXT: vsll.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv8i16_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0, v0.t
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0, v0.t
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
%splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -663,17 +669,17 @@ define <vscale x 8 x i16> @vwsll_vx_i8_nxv8i16_zext(<vscale x 8 x i8> %a, i8 %b,
define <vscale x 8 x i16> @vwsll_vi_nxv8i16(<vscale x 8 x i8> %a, <vscale x 8 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: vwsll_vi_nxv8i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 4
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vwmulu.vx v10, v8, a1, v0.t
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: li a0, 4
+; CHECK-NEXT: vwmulu.vx v8, v10, a0, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vwsll_vi_nxv8i16:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT: vwsll.vi v10, v8, 2, v0.t
-; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
+; CHECK-ZVBB-NEXT: vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vi v8, v10, 2, v0.t
; CHECK-ZVBB-NEXT: ret
%x = zext <vscale x 8 x i8> %a to <vscale x 8 x i16>
%z = call <vscale x 8 x i16> @llvm.vp.shl.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> splat (i16 2), <vscale x 8 x i1> %m, i32 %vl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsll.ll b/llvm/test/CodeGen/RISCV/rvv/vwsll.ll
index 9711afb2aafbf..14af93408d3d8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsll.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsll.ll
@@ -158,8 +158,9 @@ define <vscale x 8 x i16> @intrinsic_vwsll_vv_nxv8i16_nxv8i8_nxv8i8(<vscale x 8
; CHECK-LABEL: intrinsic_vwsll_vv_nxv8i16_nxv8i8_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vwsll.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwsll.vv v8, v11, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwsll.nxv8i16.nxv8i8.nxv8i8(
@@ -206,8 +207,9 @@ define <vscale x 16 x i16> @intrinsic_vwsll_vv_nxv16i16_nxv16i8_nxv16i8(<vscale
; CHECK-LABEL: intrinsic_vwsll_vv_nxv16i16_nxv16i8_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT: vwsll.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwsll.vv v8, v14, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwsll.nxv16i16.nxv16i8.nxv16i8(
@@ -254,8 +256,9 @@ define <vscale x 32 x i16> @intrinsic_vwsll_vv_nxv32i16_nxv32i8_nxv32i8(<vscale
; CHECK-LABEL: intrinsic_vwsll_vv_nxv32i16_nxv32i8_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT: vwsll.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vwsll.vv v8, v20, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwsll.nxv32i16.nxv32i8.nxv32i8(
@@ -398,8 +401,9 @@ define <vscale x 4 x i32> @intrinsic_vwsll_vv_nxv4i32_nxv4i16_nxv4i16(<vscale x
; CHECK-LABEL: intrinsic_vwsll_vv_nxv4i32_nxv4i16_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vwsll.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwsll.vv v8, v11, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwsll.nxv4i32.nxv4i16.nxv4i16(
@@ -446,8 +450,9 @@ define <vscale x 8 x i32> @intrinsic_vwsll_vv_nxv8i32_nxv8i16_nxv8i16(<vscale x
; CHECK-LABEL: intrinsic_vwsll_vv_nxv8i32_nxv8i16_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vwsll.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwsll.vv v8, v14, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwsll.nxv8i32.nxv8i16.nxv8i16(
@@ -494,8 +499,9 @@ define <vscale x 16 x i32> @intrinsic_vwsll_vv_nxv16i32_nxv16i16_nxv16i16(<vscal
; CHECK-LABEL: intrinsic_vwsll_vv_nxv16i32_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vwsll.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vwsll.vv v8, v20, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwsll.nxv16i32.nxv16i16.nxv16i16(
@@ -590,8 +596,9 @@ define <vscale x 2 x i64> @intrinsic_vwsll_vv_nxv2i64_nxv2i32_nxv2i32(<vscale x
; CHECK-LABEL: intrinsic_vwsll_vv_nxv2i64_nxv2i32_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vwsll.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwsll.vv v8, v11, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwsll.nxv2i64.nxv2i32.nxv2i32(
@@ -638,8 +645,9 @@ define <vscale x 4 x i64> @intrinsic_vwsll_vv_nxv4i64_nxv4i32_nxv4i32(<vscale x
; CHECK-LABEL: intrinsic_vwsll_vv_nxv4i64_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vwsll.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwsll.vv v8, v14, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwsll.nxv4i64.nxv4i32.nxv4i32(
@@ -686,8 +694,9 @@ define <vscale x 8 x i64> @intrinsic_vwsll_vv_nxv8i64_nxv8i32_nxv8i32(<vscale x
; CHECK-LABEL: intrinsic_vwsll_vv_nxv8i64_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vwsll.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vwsll.vv v8, v20, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwsll.nxv8i64.nxv8i32.nxv8i32(
@@ -878,8 +887,8 @@ define <vscale x 8 x i16> @intrinsic_vwsll_vx_nxv8i16_nxv8i8(<vscale x 8 x i8> %
; CHECK-LABEL: intrinsic_vwsll_vx_nxv8i16_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vwsll.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwsll.vx v8, v10, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwsll.nxv8i16.nxv8i8(
@@ -926,8 +935,8 @@ define <vscale x 16 x i16> @intrinsic_vwsll_vx_nxv16i16_nxv16i8(<vscale x 16 x i
; CHECK-LABEL: intrinsic_vwsll_vx_nxv16i16_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT: vwsll.vx v12, v8, a0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwsll.vx v8, v12, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwsll.nxv16i16.nxv16i8(
@@ -974,8 +983,8 @@ define <vscale x 32 x i16> @intrinsic_vwsll_vx_nxv32i16_nxv32i8(<vscale x 32 x i
; CHECK-LABEL: intrinsic_vwsll_vx_nxv32i16_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT: vwsll.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwsll.vx v8, v16, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwsll.nxv32i16.nxv32i8(
@@ -1118,8 +1127,8 @@ define <vscale x 4 x i32> @intrinsic_vwsll_vx_nxv4i32_nxv4i16(<vscale x 4 x i16>
; CHECK-LABEL: intrinsic_vwsll_vx_nxv4i32_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vwsll.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwsll.vx v8, v10, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwsll.nxv4i32.nxv4i16(
@@ -1166,8 +1175,8 @@ define <vscale x 8 x i32> @intrinsic_vwsll_vx_nxv8i32_nxv8i16(<vscale x 8 x i16>
; CHECK-LABEL: intrinsic_vwsll_vx_nxv8i32_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vwsll.vx v12, v8, a0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwsll.vx v8, v12, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwsll.nxv8i32.nxv8i16(
@@ -1214,8 +1223,8 @@ define <vscale x 16 x i32> @intrinsic_vwsll_vx_nxv16i32_nxv16i16(<vscale x 16 x
; CHECK-LABEL: intrinsic_vwsll_vx_nxv16i32_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vwsll.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwsll.vx v8, v16, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwsll.nxv16i32.nxv16i16(
@@ -1310,8 +1319,8 @@ define <vscale x 2 x i64> @intrinsic_vwsll_vx_nxv2i64_nxv2i32(<vscale x 2 x i32>
; CHECK-LABEL: intrinsic_vwsll_vx_nxv2i64_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vwsll.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwsll.vx v8, v10, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwsll.nxv2i64.nxv2i32(
@@ -1358,8 +1367,8 @@ define <vscale x 4 x i64> @intrinsic_vwsll_vx_nxv4i64_nxv4i32(<vscale x 4 x i32>
; CHECK-LABEL: intrinsic_vwsll_vx_nxv4i64_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vwsll.vx v12, v8, a0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwsll.vx v8, v12, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwsll.nxv4i64.nxv4i32(
@@ -1406,8 +1415,8 @@ define <vscale x 8 x i64> @intrinsic_vwsll_vx_nxv8i64_nxv8i32(<vscale x 8 x i32>
; CHECK-LABEL: intrinsic_vwsll_vx_nxv8i64_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vwsll.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwsll.vx v8, v16, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwsll.nxv8i64.nxv8i32(
@@ -1550,8 +1559,8 @@ define <vscale x 8 x i16> @intrinsic_vwsll_vi_nxv8i16_nxv8i8(<vscale x 8 x i8> %
; CHECK-LABEL: intrinsic_vwsll_vi_nxv8i16_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vwsll.vi v10, v8, 1
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwsll.vi v8, v10, 1
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwsll.nxv8i16.nxv8i8(
@@ -1584,8 +1593,8 @@ define <vscale x 16 x i16> @intrinsic_vwsll_vi_nxv16i16_nxv16i8(<vscale x 16 x i
; CHECK-LABEL: intrinsic_vwsll_vi_nxv16i16_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT: vwsll.vi v12, v8, 1
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwsll.vi v8, v12, 1
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwsll.nxv16i16.nxv16i8(
@@ -1618,8 +1627,8 @@ define <vscale x 32 x i16> @intrinsic_vwsll_vi_nxv32i16_nxv32i8(<vscale x 32 x i
; CHECK-LABEL: intrinsic_vwsll_vi_nxv32i16_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT: vwsll.vi v16, v8, 1
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwsll.vi v8, v16, 1
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwsll.nxv32i16.nxv32i8(
@@ -1720,8 +1729,8 @@ define <vscale x 4 x i32> @intrinsic_vwsll_vi_nxv4i32_nxv4i16(<vscale x 4 x i16>
; CHECK-LABEL: intrinsic_vwsll_vi_nxv4i32_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vwsll.vi v10, v8, 1
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwsll.vi v8, v10, 1
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwsll.nxv4i32.nxv4i16(
@@ -1754,8 +1763,8 @@ define <vscale x 8 x i32> @intrinsic_vwsll_vi_nxv8i32_nxv8i16(<vscale x 8 x i16>
; CHECK-LABEL: intrinsic_vwsll_vi_nxv8i32_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vwsll.vi v12, v8, 1
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwsll.vi v8, v12, 1
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwsll.nxv8i32.nxv8i16(
@@ -1788,8 +1797,8 @@ define <vscale x 16 x i32> @intrinsic_vwsll_vi_nxv16i32_nxv16i16(<vscale x 16 x
; CHECK-LABEL: intrinsic_vwsll_vi_nxv16i32_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vwsll.vi v16, v8, 1
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwsll.vi v8, v16, 1
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwsll.nxv16i32.nxv16i16(
@@ -1856,8 +1865,8 @@ define <vscale x 2 x i64> @intrinsic_vwsll_vi_nxv2i64_nxv2i32(<vscale x 2 x i32>
; CHECK-LABEL: intrinsic_vwsll_vi_nxv2i64_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vwsll.vi v10, v8, 1
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwsll.vi v8, v10, 1
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwsll.nxv2i64.nxv2i32(
@@ -1890,8 +1899,8 @@ define <vscale x 4 x i64> @intrinsic_vwsll_vi_nxv4i64_nxv4i32(<vscale x 4 x i32>
; CHECK-LABEL: intrinsic_vwsll_vi_nxv4i64_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vwsll.vi v12, v8, 1
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwsll.vi v8, v12, 1
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwsll.nxv4i64.nxv4i32(
@@ -1924,8 +1933,8 @@ define <vscale x 8 x i64> @intrinsic_vwsll_vi_nxv8i64_nxv8i32(<vscale x 8 x i32>
; CHECK-LABEL: intrinsic_vwsll_vi_nxv8i64_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vwsll.vi v16, v8, 1
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwsll.vi v8, v16, 1
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwsll.nxv8i64.nxv8i32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll
index 04ece9d94880c..22f9f97373415 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll
@@ -5,12 +5,13 @@
define <vscale x 8 x i64> @vwsub_wv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i64> %y) {
; CHECK-LABEL: vwsub_wv_mask_v8i32:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmv4r.v v24, v8
; CHECK-NEXT: li a0, 42
-; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
; CHECK-NEXT: vmslt.vx v0, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, mu
-; CHECK-NEXT: vwsub.wv v16, v16, v8, v0.t
; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, mu
+; CHECK-NEXT: vwsub.wv v8, v8, v24, v0.t
; CHECK-NEXT: ret
%mask = icmp slt <vscale x 8 x i32> %x, splat (i32 42)
%a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
@@ -22,12 +23,13 @@ define <vscale x 8 x i64> @vwsub_wv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x
define <vscale x 8 x i64> @vwsubu_wv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i64> %y) {
; CHECK-LABEL: vwsubu_wv_mask_v8i32:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmv4r.v v24, v8
; CHECK-NEXT: li a0, 42
-; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
; CHECK-NEXT: vmslt.vx v0, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, mu
-; CHECK-NEXT: vwsubu.wv v16, v16, v8, v0.t
; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, mu
+; CHECK-NEXT: vwsubu.wv v8, v8, v24, v0.t
; CHECK-NEXT: ret
%mask = icmp slt <vscale x 8 x i32> %x, splat (i32 42)
%a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
@@ -39,13 +41,13 @@ define <vscale x 8 x i64> @vwsubu_wv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x
define <vscale x 8 x i64> @vwsubu_vv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %y) {
; CHECK-LABEL: vwsubu_vv_mask_v8i32:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmv4r.v v16, v12
; CHECK-NEXT: li a0, 42
-; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
; CHECK-NEXT: vmslt.vx v0, v8, a0
-; CHECK-NEXT: vmv.v.i v16, 0
-; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
-; CHECK-NEXT: vwsubu.vv v16, v12, v8
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv.v.i v12, 0
+; CHECK-NEXT: vmerge.vvm v20, v12, v8, v0
+; CHECK-NEXT: vwsubu.vv v8, v16, v20
; CHECK-NEXT: ret
%mask = icmp slt <vscale x 8 x i32> %x, splat (i32 42)
%a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll
index 852814d648bfc..631c8c3ca48f9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll
@@ -110,8 +110,9 @@ define <vscale x 2 x i64> @vwsub_vv_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, <vsc
; CHECK-LABEL: vwsub_vv_nxv2i64_nxv2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vwsub.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwsub.vv v8, v11, v10
; CHECK-NEXT: ret
%vc = sext <vscale x 2 x i32> %va to <vscale x 2 x i64>
%vd = sext <vscale x 2 x i32> %vb to <vscale x 2 x i64>
@@ -123,8 +124,9 @@ define <vscale x 2 x i64> @vwsubu_vv_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, <vs
; CHECK-LABEL: vwsubu_vv_nxv2i64_nxv2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vwsubu.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwsubu.vv v8, v11, v10
; CHECK-NEXT: ret
%vc = zext <vscale x 2 x i32> %va to <vscale x 2 x i64>
%vd = zext <vscale x 2 x i32> %vb to <vscale x 2 x i64>
@@ -136,8 +138,8 @@ define <vscale x 2 x i64> @vwsub_vx_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, i32
; CHECK-LABEL: vwsub_vx_nxv2i64_nxv2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT: vwsub.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwsub.vx v8, v10, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
%splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -151,8 +153,8 @@ define <vscale x 2 x i64> @vwsubu_vx_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, i32
; CHECK-LABEL: vwsubu_vx_nxv2i64_nxv2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT: vwsubu.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwsubu.vx v8, v10, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
%splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -214,8 +216,9 @@ define <vscale x 4 x i64> @vwsub_vv_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, <vsc
; CHECK-LABEL: vwsub_vv_nxv4i64_nxv4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vwsub.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwsub.vv v8, v14, v12
; CHECK-NEXT: ret
%vc = sext <vscale x 4 x i32> %va to <vscale x 4 x i64>
%vd = sext <vscale x 4 x i32> %vb to <vscale x 4 x i64>
@@ -227,8 +230,9 @@ define <vscale x 4 x i64> @vwsubu_vv_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, <vs
; CHECK-LABEL: vwsubu_vv_nxv4i64_nxv4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vwsubu.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwsubu.vv v8, v14, v12
; CHECK-NEXT: ret
%vc = zext <vscale x 4 x i32> %va to <vscale x 4 x i64>
%vd = zext <vscale x 4 x i32> %vb to <vscale x 4 x i64>
@@ -240,8 +244,8 @@ define <vscale x 4 x i64> @vwsub_vx_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, i32
; CHECK-LABEL: vwsub_vx_nxv4i64_nxv4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT: vwsub.vx v12, v8, a0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwsub.vx v8, v12, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
%splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
@@ -255,8 +259,8 @@ define <vscale x 4 x i64> @vwsubu_vx_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, i32
; CHECK-LABEL: vwsubu_vx_nxv4i64_nxv4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT: vwsubu.vx v12, v8, a0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwsubu.vx v8, v12, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
%splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
@@ -318,8 +322,9 @@ define <vscale x 8 x i64> @vwsub_vv_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, <vsc
; CHECK-LABEL: vwsub_vv_nxv8i64_nxv8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vwsub.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vwsub.vv v8, v20, v16
; CHECK-NEXT: ret
%vc = sext <vscale x 8 x i32> %va to <vscale x 8 x i64>
%vd = sext <vscale x 8 x i32> %vb to <vscale x 8 x i64>
@@ -331,8 +336,9 @@ define <vscale x 8 x i64> @vwsubu_vv_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, <vs
; CHECK-LABEL: vwsubu_vv_nxv8i64_nxv8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vwsubu.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vwsubu.vv v8, v20, v16
; CHECK-NEXT: ret
%vc = zext <vscale x 8 x i32> %va to <vscale x 8 x i64>
%vd = zext <vscale x 8 x i32> %vb to <vscale x 8 x i64>
@@ -344,8 +350,8 @@ define <vscale x 8 x i64> @vwsub_vx_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, i32
; CHECK-LABEL: vwsub_vx_nxv8i64_nxv8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT: vwsub.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwsub.vx v8, v16, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
%splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -359,8 +365,8 @@ define <vscale x 8 x i64> @vwsubu_vx_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, i32
; CHECK-LABEL: vwsubu_vx_nxv8i64_nxv8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT: vwsubu.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwsubu.vx v8, v16, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
%splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -744,10 +750,10 @@ define <vscale x 4 x i64> @vwsub_wx_nxv4i64_nxv4i16(<vscale x 4 x i64> %va, i16
; CHECK-LABEL: vwsub_wx_nxv4i64_nxv4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v12, a0
+; CHECK-NEXT: vmv.v.x v14, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vsext.vf2 v14, v12
-; CHECK-NEXT: vwsub.wv v8, v8, v14
+; CHECK-NEXT: vsext.vf2 v12, v14
+; CHECK-NEXT: vwsub.wv v8, v8, v12
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i16> poison, i16 %b, i16 0
%splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -760,10 +766,10 @@ define <vscale x 4 x i64> @vwsubu_wx_nxv4i64_nxv4i16(<vscale x 4 x i64> %va, i16
; CHECK-LABEL: vwsubu_wx_nxv4i64_nxv4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v12, a0
+; CHECK-NEXT: vmv.v.x v14, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v14, v12
-; CHECK-NEXT: vwsubu.wv v8, v8, v14
+; CHECK-NEXT: vzext.vf2 v12, v14
+; CHECK-NEXT: vwsubu.wv v8, v8, v12
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i16> poison, i16 %b, i16 0
%splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -862,10 +868,10 @@ define <vscale x 8 x i64> @vwsub_wx_nxv8i64_nxv8i16(<vscale x 8 x i64> %va, i16
; CHECK-LABEL: vwsub_wx_nxv8i64_nxv8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT: vmv.v.x v16, a0
+; CHECK-NEXT: vmv.v.x v20, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vsext.vf2 v20, v16
-; CHECK-NEXT: vwsub.wv v8, v8, v20
+; CHECK-NEXT: vsext.vf2 v16, v20
+; CHECK-NEXT: vwsub.wv v8, v8, v16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i16> poison, i16 %b, i16 0
%splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -878,10 +884,10 @@ define <vscale x 8 x i64> @vwsubu_wx_nxv8i64_nxv8i16(<vscale x 8 x i64> %va, i16
; CHECK-LABEL: vwsubu_wx_nxv8i64_nxv8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT: vmv.v.x v16, a0
+; CHECK-NEXT: vmv.v.x v20, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vzext.vf2 v20, v16
-; CHECK-NEXT: vwsubu.wv v8, v8, v20
+; CHECK-NEXT: vzext.vf2 v16, v20
+; CHECK-NEXT: vwsubu.wv v8, v8, v16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i16> poison, i16 %b, i16 0
%splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -1216,10 +1222,10 @@ define <vscale x 4 x i64> @vwsub_wx_nxv4i64_nxv4i8(<vscale x 4 x i64> %va, i8 %b
; CHECK-LABEL: vwsub_wx_nxv4i64_nxv4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v12, a0
+; CHECK-NEXT: vmv.v.x v14, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vsext.vf4 v14, v12
-; CHECK-NEXT: vwsub.wv v8, v8, v14
+; CHECK-NEXT: vsext.vf4 v12, v14
+; CHECK-NEXT: vwsub.wv v8, v8, v12
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i8> poison, i8 %b, i8 0
%splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -1232,10 +1238,10 @@ define <vscale x 4 x i64> @vwsubu_wx_nxv4i64_nxv4i8(<vscale x 4 x i64> %va, i8 %
; CHECK-LABEL: vwsubu_wx_nxv4i64_nxv4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v12, a0
+; CHECK-NEXT: vmv.v.x v14, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vzext.vf4 v14, v12
-; CHECK-NEXT: vwsubu.wv v8, v8, v14
+; CHECK-NEXT: vzext.vf4 v12, v14
+; CHECK-NEXT: vwsubu.wv v8, v8, v12
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i8> poison, i8 %b, i8 0
%splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -1334,10 +1340,10 @@ define <vscale x 8 x i64> @vwsub_wx_nxv8i64_nxv8i8(<vscale x 8 x i64> %va, i8 %b
; CHECK-LABEL: vwsub_wx_nxv8i64_nxv8i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v16, a0
+; CHECK-NEXT: vmv.v.x v20, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vsext.vf4 v20, v16
-; CHECK-NEXT: vwsub.wv v8, v8, v20
+; CHECK-NEXT: vsext.vf4 v16, v20
+; CHECK-NEXT: vwsub.wv v8, v8, v16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i8> poison, i8 %b, i8 0
%splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -1350,10 +1356,10 @@ define <vscale x 8 x i64> @vwsubu_wx_nxv8i64_nxv8i8(<vscale x 8 x i64> %va, i8 %
; CHECK-LABEL: vwsubu_wx_nxv8i64_nxv8i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v16, a0
+; CHECK-NEXT: vmv.v.x v20, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vzext.vf4 v20, v16
-; CHECK-NEXT: vwsubu.wv v8, v8, v20
+; CHECK-NEXT: vzext.vf4 v16, v20
+; CHECK-NEXT: vwsubu.wv v8, v8, v16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i8> poison, i8 %b, i8 0
%splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub.ll
index c22d86cdef1d7..24e03df413bfd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsub.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsub.ll
@@ -158,8 +158,9 @@ define <vscale x 8 x i16> @intrinsic_vwsub_vv_nxv8i16_nxv8i8_nxv8i8(<vscale x 8
; CHECK-LABEL: intrinsic_vwsub_vv_nxv8i16_nxv8i8_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vwsub.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwsub.vv v8, v11, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwsub.nxv8i16.nxv8i8.nxv8i8(
@@ -206,8 +207,9 @@ define <vscale x 16 x i16> @intrinsic_vwsub_vv_nxv16i16_nxv16i8_nxv16i8(<vscale
; CHECK-LABEL: intrinsic_vwsub_vv_nxv16i16_nxv16i8_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT: vwsub.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwsub.vv v8, v14, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwsub.nxv16i16.nxv16i8.nxv16i8(
@@ -254,8 +256,9 @@ define <vscale x 32 x i16> @intrinsic_vwsub_vv_nxv32i16_nxv32i8_nxv32i8(<vscale
; CHECK-LABEL: intrinsic_vwsub_vv_nxv32i16_nxv32i8_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT: vwsub.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vwsub.vv v8, v20, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwsub.nxv32i16.nxv32i8.nxv32i8(
@@ -398,8 +401,9 @@ define <vscale x 4 x i32> @intrinsic_vwsub_vv_nxv4i32_nxv4i16_nxv4i16(<vscale x
; CHECK-LABEL: intrinsic_vwsub_vv_nxv4i32_nxv4i16_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vwsub.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwsub.vv v8, v11, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwsub.nxv4i32.nxv4i16.nxv4i16(
@@ -446,8 +450,9 @@ define <vscale x 8 x i32> @intrinsic_vwsub_vv_nxv8i32_nxv8i16_nxv8i16(<vscale x
; CHECK-LABEL: intrinsic_vwsub_vv_nxv8i32_nxv8i16_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vwsub.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwsub.vv v8, v14, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwsub.nxv8i32.nxv8i16.nxv8i16(
@@ -494,8 +499,9 @@ define <vscale x 16 x i32> @intrinsic_vwsub_vv_nxv16i32_nxv16i16_nxv16i16(<vscal
; CHECK-LABEL: intrinsic_vwsub_vv_nxv16i32_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vwsub.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vwsub.vv v8, v20, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwsub.nxv16i32.nxv16i16.nxv16i16(
@@ -590,8 +596,9 @@ define <vscale x 2 x i64> @intrinsic_vwsub_vv_nxv2i64_nxv2i32_nxv2i32(<vscale x
; CHECK-LABEL: intrinsic_vwsub_vv_nxv2i64_nxv2i32_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vwsub.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwsub.vv v8, v11, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwsub.nxv2i64.nxv2i32.nxv2i32(
@@ -638,8 +645,9 @@ define <vscale x 4 x i64> @intrinsic_vwsub_vv_nxv4i64_nxv4i32_nxv4i32(<vscale x
; CHECK-LABEL: intrinsic_vwsub_vv_nxv4i64_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vwsub.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwsub.vv v8, v14, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwsub.nxv4i64.nxv4i32.nxv4i32(
@@ -686,8 +694,9 @@ define <vscale x 8 x i64> @intrinsic_vwsub_vv_nxv8i64_nxv8i32_nxv8i32(<vscale x
; CHECK-LABEL: intrinsic_vwsub_vv_nxv8i64_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vwsub.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vwsub.vv v8, v20, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwsub.nxv8i64.nxv8i32.nxv8i32(
@@ -878,8 +887,8 @@ define <vscale x 8 x i16> @intrinsic_vwsub_vx_nxv8i16_nxv8i8_i8(<vscale x 8 x i8
; CHECK-LABEL: intrinsic_vwsub_vx_nxv8i16_nxv8i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vwsub.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwsub.vx v8, v10, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwsub.nxv8i16.nxv8i8.i8(
@@ -926,8 +935,8 @@ define <vscale x 16 x i16> @intrinsic_vwsub_vx_nxv16i16_nxv16i8_i8(<vscale x 16
; CHECK-LABEL: intrinsic_vwsub_vx_nxv16i16_nxv16i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT: vwsub.vx v12, v8, a0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwsub.vx v8, v12, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwsub.nxv16i16.nxv16i8.i8(
@@ -974,8 +983,8 @@ define <vscale x 32 x i16> @intrinsic_vwsub_vx_nxv32i16_nxv32i8_i8(<vscale x 32
; CHECK-LABEL: intrinsic_vwsub_vx_nxv32i16_nxv32i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT: vwsub.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwsub.vx v8, v16, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwsub.nxv32i16.nxv32i8.i8(
@@ -1118,8 +1127,8 @@ define <vscale x 4 x i32> @intrinsic_vwsub_vx_nxv4i32_nxv4i16_i16(<vscale x 4 x
; CHECK-LABEL: intrinsic_vwsub_vx_nxv4i32_nxv4i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vwsub.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwsub.vx v8, v10, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwsub.nxv4i32.nxv4i16.i16(
@@ -1166,8 +1175,8 @@ define <vscale x 8 x i32> @intrinsic_vwsub_vx_nxv8i32_nxv8i16_i16(<vscale x 8 x
; CHECK-LABEL: intrinsic_vwsub_vx_nxv8i32_nxv8i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vwsub.vx v12, v8, a0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwsub.vx v8, v12, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwsub.nxv8i32.nxv8i16.i16(
@@ -1214,8 +1223,8 @@ define <vscale x 16 x i32> @intrinsic_vwsub_vx_nxv16i32_nxv16i16_i16(<vscale x 1
; CHECK-LABEL: intrinsic_vwsub_vx_nxv16i32_nxv16i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vwsub.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwsub.vx v8, v16, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwsub.nxv16i32.nxv16i16.i16(
@@ -1310,8 +1319,8 @@ define <vscale x 2 x i64> @intrinsic_vwsub_vx_nxv2i64_nxv2i32_i32(<vscale x 2 x
; CHECK-LABEL: intrinsic_vwsub_vx_nxv2i64_nxv2i32_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vwsub.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwsub.vx v8, v10, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwsub.nxv2i64.nxv2i32.i32(
@@ -1358,8 +1367,8 @@ define <vscale x 4 x i64> @intrinsic_vwsub_vx_nxv4i64_nxv4i32_i32(<vscale x 4 x
; CHECK-LABEL: intrinsic_vwsub_vx_nxv4i64_nxv4i32_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vwsub.vx v12, v8, a0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwsub.vx v8, v12, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwsub.nxv4i64.nxv4i32.i32(
@@ -1406,8 +1415,8 @@ define <vscale x 8 x i64> @intrinsic_vwsub_vx_nxv8i64_nxv8i32_i32(<vscale x 8 x
; CHECK-LABEL: intrinsic_vwsub_vx_nxv8i64_nxv8i32_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vwsub.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwsub.vx v8, v16, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwsub.nxv8i64.nxv8i32.i32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub.w.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub.w.ll
index 08ed452776f95..1bc6103d298b2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsub.w.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsub.w.ll
@@ -1982,8 +1982,8 @@ define <vscale x 8 x i16> @intrinsic_vwsub.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<vs
; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv8i16_nxv8i16_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vwsub.wv v12, v10, v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vwsub.wv v8, v10, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwsub.w.nxv8i16.nxv8i8(
@@ -1999,8 +1999,8 @@ define <vscale x 16 x i16> @intrinsic_vwsub.w_wv_untie_nxv16i16_nxv16i16_nxv16i8
; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv16i16_nxv16i16_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT: vwsub.wv v16, v12, v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: vwsub.wv v8, v12, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwsub.w.nxv16i16.nxv16i8(
@@ -2016,8 +2016,8 @@ define <vscale x 32 x i16> @intrinsic_vwsub.w_wv_untie_nxv32i16_nxv32i16_nxv32i8
; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv32i16_nxv32i16_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT: vwsub.wv v24, v16, v8
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vmv4r.v v24, v8
+; CHECK-NEXT: vwsub.wv v8, v16, v24
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwsub.w.nxv32i16.nxv32i8(
@@ -2067,8 +2067,8 @@ define <vscale x 4 x i32> @intrinsic_vwsub.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<v
; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv4i32_nxv4i32_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vwsub.wv v12, v10, v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vwsub.wv v8, v10, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwsub.w.nxv4i32.nxv4i16(
@@ -2084,8 +2084,8 @@ define <vscale x 8 x i32> @intrinsic_vwsub.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<v
; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv8i32_nxv8i32_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vwsub.wv v16, v12, v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: vwsub.wv v8, v12, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwsub.w.nxv8i32.nxv8i16(
@@ -2118,8 +2118,8 @@ define <vscale x 2 x i64> @intrinsic_vwsub.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<v
; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv2i64_nxv2i64_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vwsub.wv v12, v10, v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vwsub.wv v8, v10, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwsub.w.nxv2i64.nxv2i32(
@@ -2135,8 +2135,8 @@ define <vscale x 4 x i64> @intrinsic_vwsub.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<v
; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv4i64_nxv4i64_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vwsub.wv v16, v12, v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: vwsub.wv v8, v12, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwsub.w.nxv4i64.nxv4i32(
@@ -2152,8 +2152,8 @@ define <vscale x 8 x i64> @intrinsic_vwsub.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<v
; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv8i64_nxv8i64_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vwsub.wv v24, v16, v8
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vmv4r.v v24, v8
+; CHECK-NEXT: vwsub.wv v8, v16, v24
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwsub.w.nxv8i64.nxv8i32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsubu.ll b/llvm/test/CodeGen/RISCV/rvv/vwsubu.ll
index 858526d3f9b34..d849b0aaffc63 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsubu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsubu.ll
@@ -158,8 +158,9 @@ define <vscale x 8 x i16> @intrinsic_vwsubu_vv_nxv8i16_nxv8i8_nxv8i8(<vscale x 8
; CHECK-LABEL: intrinsic_vwsubu_vv_nxv8i16_nxv8i8_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vwsubu.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwsubu.vv v8, v11, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwsubu.nxv8i16.nxv8i8.nxv8i8(
@@ -206,8 +207,9 @@ define <vscale x 16 x i16> @intrinsic_vwsubu_vv_nxv16i16_nxv16i8_nxv16i8(<vscale
; CHECK-LABEL: intrinsic_vwsubu_vv_nxv16i16_nxv16i8_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT: vwsubu.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwsubu.vv v8, v14, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwsubu.nxv16i16.nxv16i8.nxv16i8(
@@ -254,8 +256,9 @@ define <vscale x 32 x i16> @intrinsic_vwsubu_vv_nxv32i16_nxv32i8_nxv32i8(<vscale
; CHECK-LABEL: intrinsic_vwsubu_vv_nxv32i16_nxv32i8_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT: vwsubu.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vwsubu.vv v8, v20, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwsubu.nxv32i16.nxv32i8.nxv32i8(
@@ -398,8 +401,9 @@ define <vscale x 4 x i32> @intrinsic_vwsubu_vv_nxv4i32_nxv4i16_nxv4i16(<vscale x
; CHECK-LABEL: intrinsic_vwsubu_vv_nxv4i32_nxv4i16_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vwsubu.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwsubu.vv v8, v11, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwsubu.nxv4i32.nxv4i16.nxv4i16(
@@ -446,8 +450,9 @@ define <vscale x 8 x i32> @intrinsic_vwsubu_vv_nxv8i32_nxv8i16_nxv8i16(<vscale x
; CHECK-LABEL: intrinsic_vwsubu_vv_nxv8i32_nxv8i16_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vwsubu.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwsubu.vv v8, v14, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwsubu.nxv8i32.nxv8i16.nxv8i16(
@@ -494,8 +499,9 @@ define <vscale x 16 x i32> @intrinsic_vwsubu_vv_nxv16i32_nxv16i16_nxv16i16(<vsca
; CHECK-LABEL: intrinsic_vwsubu_vv_nxv16i32_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vwsubu.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vwsubu.vv v8, v20, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwsubu.nxv16i32.nxv16i16.nxv16i16(
@@ -590,8 +596,9 @@ define <vscale x 2 x i64> @intrinsic_vwsubu_vv_nxv2i64_nxv2i32_nxv2i32(<vscale x
; CHECK-LABEL: intrinsic_vwsubu_vv_nxv2i64_nxv2i32_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vwsubu.vv v10, v8, v9
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vwsubu.vv v8, v11, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwsubu.nxv2i64.nxv2i32.nxv2i32(
@@ -638,8 +645,9 @@ define <vscale x 4 x i64> @intrinsic_vwsubu_vv_nxv4i64_nxv4i32_nxv4i32(<vscale x
; CHECK-LABEL: intrinsic_vwsubu_vv_nxv4i64_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vwsubu.vv v12, v8, v10
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vwsubu.vv v8, v14, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwsubu.nxv4i64.nxv4i32.nxv4i32(
@@ -686,8 +694,9 @@ define <vscale x 8 x i64> @intrinsic_vwsubu_vv_nxv8i64_nxv8i32_nxv8i32(<vscale x
; CHECK-LABEL: intrinsic_vwsubu_vv_nxv8i64_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vwsubu.vv v16, v8, v12
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v12
+; CHECK-NEXT: vmv4r.v v20, v8
+; CHECK-NEXT: vwsubu.vv v8, v20, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwsubu.nxv8i64.nxv8i32.nxv8i32(
@@ -878,8 +887,8 @@ define <vscale x 8 x i16> @intrinsic_vwsubu_vx_nxv8i16_nxv8i8_i8(<vscale x 8 x i
; CHECK-LABEL: intrinsic_vwsubu_vx_nxv8i16_nxv8i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vwsubu.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwsubu.vx v8, v10, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwsubu.nxv8i16.nxv8i8.i8(
@@ -926,8 +935,8 @@ define <vscale x 16 x i16> @intrinsic_vwsubu_vx_nxv16i16_nxv16i8_i8(<vscale x 16
; CHECK-LABEL: intrinsic_vwsubu_vx_nxv16i16_nxv16i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT: vwsubu.vx v12, v8, a0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwsubu.vx v8, v12, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwsubu.nxv16i16.nxv16i8.i8(
@@ -974,8 +983,8 @@ define <vscale x 32 x i16> @intrinsic_vwsubu_vx_nxv32i16_nxv32i8_i8(<vscale x 32
; CHECK-LABEL: intrinsic_vwsubu_vx_nxv32i16_nxv32i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT: vwsubu.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwsubu.vx v8, v16, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwsubu.nxv32i16.nxv32i8.i8(
@@ -1118,8 +1127,8 @@ define <vscale x 4 x i32> @intrinsic_vwsubu_vx_nxv4i32_nxv4i16_i16(<vscale x 4 x
; CHECK-LABEL: intrinsic_vwsubu_vx_nxv4i32_nxv4i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vwsubu.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwsubu.vx v8, v10, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwsubu.nxv4i32.nxv4i16.i16(
@@ -1166,8 +1175,8 @@ define <vscale x 8 x i32> @intrinsic_vwsubu_vx_nxv8i32_nxv8i16_i16(<vscale x 8 x
; CHECK-LABEL: intrinsic_vwsubu_vx_nxv8i32_nxv8i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vwsubu.vx v12, v8, a0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwsubu.vx v8, v12, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwsubu.nxv8i32.nxv8i16.i16(
@@ -1214,8 +1223,8 @@ define <vscale x 16 x i32> @intrinsic_vwsubu_vx_nxv16i32_nxv16i16_i16(<vscale x
; CHECK-LABEL: intrinsic_vwsubu_vx_nxv16i32_nxv16i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vwsubu.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwsubu.vx v8, v16, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwsubu.nxv16i32.nxv16i16.i16(
@@ -1310,8 +1319,8 @@ define <vscale x 2 x i64> @intrinsic_vwsubu_vx_nxv2i64_nxv2i32_i32(<vscale x 2 x
; CHECK-LABEL: intrinsic_vwsubu_vx_nxv2i64_nxv2i32_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vwsubu.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vwsubu.vx v8, v10, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwsubu.nxv2i64.nxv2i32.i32(
@@ -1358,8 +1367,8 @@ define <vscale x 4 x i64> @intrinsic_vwsubu_vx_nxv4i64_nxv4i32_i32(<vscale x 4 x
; CHECK-LABEL: intrinsic_vwsubu_vx_nxv4i64_nxv4i32_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vwsubu.vx v12, v8, a0
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vwsubu.vx v8, v12, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwsubu.nxv4i64.nxv4i32.i32(
@@ -1406,8 +1415,8 @@ define <vscale x 8 x i64> @intrinsic_vwsubu_vx_nxv8i64_nxv8i32_i32(<vscale x 8 x
; CHECK-LABEL: intrinsic_vwsubu_vx_nxv8i64_nxv8i32_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vwsubu.vx v16, v8, a0
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vwsubu.vx v8, v16, a0
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwsubu.nxv8i64.nxv8i32.i32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsubu.w.ll b/llvm/test/CodeGen/RISCV/rvv/vwsubu.w.ll
index 4676be60f64e1..01b3287e4ed31 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsubu.w.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsubu.w.ll
@@ -1982,8 +1982,8 @@ define <vscale x 8 x i16> @intrinsic_vwsubu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<v
; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vwsubu.wv v12, v10, v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vwsubu.wv v8, v10, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwsubu.w.nxv8i16.nxv8i8(
@@ -1999,8 +1999,8 @@ define <vscale x 16 x i16> @intrinsic_vwsubu.w_wv_untie_nxv16i16_nxv16i16_nxv16i
; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT: vwsubu.wv v16, v12, v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: vwsubu.wv v8, v12, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwsubu.w.nxv16i16.nxv16i8(
@@ -2016,8 +2016,8 @@ define <vscale x 32 x i16> @intrinsic_vwsubu.w_wv_untie_nxv32i16_nxv32i16_nxv32i
; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT: vwsubu.wv v24, v16, v8
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vmv4r.v v24, v8
+; CHECK-NEXT: vwsubu.wv v8, v16, v24
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwsubu.w.nxv32i16.nxv32i8(
@@ -2067,8 +2067,8 @@ define <vscale x 4 x i32> @intrinsic_vwsubu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<
; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vwsubu.wv v12, v10, v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vwsubu.wv v8, v10, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwsubu.w.nxv4i32.nxv4i16(
@@ -2084,8 +2084,8 @@ define <vscale x 8 x i32> @intrinsic_vwsubu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<
; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vwsubu.wv v16, v12, v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: vwsubu.wv v8, v12, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwsubu.w.nxv8i32.nxv8i16(
@@ -2118,8 +2118,8 @@ define <vscale x 2 x i64> @intrinsic_vwsubu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<
; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vwsubu.wv v12, v10, v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vwsubu.wv v8, v10, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwsubu.w.nxv2i64.nxv2i32(
@@ -2135,8 +2135,8 @@ define <vscale x 4 x i64> @intrinsic_vwsubu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<
; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vwsubu.wv v16, v12, v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: vwsubu.wv v8, v12, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwsubu.w.nxv4i64.nxv4i32(
@@ -2152,8 +2152,8 @@ define <vscale x 8 x i64> @intrinsic_vwsubu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<
; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vwsubu.wv v24, v16, v8
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vmv4r.v v24, v8
+; CHECK-NEXT: vwsubu.wv v8, v16, v24
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwsubu.w.nxv8i64.nxv8i32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vzext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vzext-vp.ll
index 934d7eb43ac2a..34337b1af1df5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vzext-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vzext-vp.ll
@@ -56,8 +56,8 @@ define <vscale x 2 x i64> @vzext_nxv2i8_nxv2i64(<vscale x 2 x i8> %a, <vscale x
; CHECK-LABEL: vzext_nxv2i8_nxv2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vzext.vf8 v10, v8, v0.t
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vzext.vf8 v8, v10, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i1> %m, i32 %vl)
ret <vscale x 2 x i64> %v
@@ -67,8 +67,8 @@ define <vscale x 2 x i64> @vzext_nxv2i8_nxv2i64_unmasked(<vscale x 2 x i8> %a, i
; CHECK-LABEL: vzext_nxv2i8_nxv2i64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vzext.vf8 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vzext.vf8 v8, v10
; CHECK-NEXT: ret
%v = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl)
ret <vscale x 2 x i64> %v
@@ -104,8 +104,8 @@ define <vscale x 2 x i64> @vzext_nxv2i16_nxv2i64(<vscale x 2 x i16> %a, <vscale
; CHECK-LABEL: vzext_nxv2i16_nxv2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vzext.vf4 v10, v8, v0.t
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vzext.vf4 v8, v10, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i1> %m, i32 %vl)
ret <vscale x 2 x i64> %v
@@ -115,8 +115,8 @@ define <vscale x 2 x i64> @vzext_nxv2i16_nxv2i64_unmasked(<vscale x 2 x i16> %a,
; CHECK-LABEL: vzext_nxv2i16_nxv2i64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vzext.vf4 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vzext.vf4 v8, v10
; CHECK-NEXT: ret
%v = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl)
ret <vscale x 2 x i64> %v
@@ -128,8 +128,8 @@ define <vscale x 2 x i64> @vzext_nxv2i32_nxv2i64(<vscale x 2 x i32> %a, <vscale
; CHECK-LABEL: vzext_nxv2i32_nxv2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v10, v8, v0.t
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vzext.vf2 v8, v10, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i32 %vl)
ret <vscale x 2 x i64> %v
@@ -139,8 +139,8 @@ define <vscale x 2 x i64> @vzext_nxv2i32_nxv2i64_unmasked(<vscale x 2 x i32> %a,
; CHECK-LABEL: vzext_nxv2i32_nxv2i64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vzext.vf2 v8, v10
; CHECK-NEXT: ret
%v = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl)
ret <vscale x 2 x i64> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/vzext.ll b/llvm/test/CodeGen/RISCV/rvv/vzext.ll
index bf246b2694436..a883041000a9a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vzext.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vzext.ll
@@ -57,8 +57,8 @@ define <vscale x 2 x i64> @intrinsic_vzext_vf8_nxv2i64(<vscale x 2 x i8> %0, iXL
; CHECK-LABEL: intrinsic_vzext_vf8_nxv2i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vzext.vf8 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vzext.vf8 v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vzext.nxv2i64.nxv2i8(
@@ -101,8 +101,8 @@ define <vscale x 4 x i64> @intrinsic_vzext_vf8_nxv4i64(<vscale x 4 x i8> %0, iXL
; CHECK-LABEL: intrinsic_vzext_vf8_nxv4i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT: vzext.vf8 v12, v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vzext.vf8 v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vzext.nxv4i64.nxv4i8(
@@ -145,8 +145,8 @@ define <vscale x 8 x i64> @intrinsic_vzext_vf8_nxv8i64(<vscale x 8 x i8> %0, iXL
; CHECK-LABEL: intrinsic_vzext_vf8_nxv8i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vzext.vf8 v16, v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv1r.v v16, v8
+; CHECK-NEXT: vzext.vf8 v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vzext.nxv8i64.nxv8i8(
@@ -233,8 +233,8 @@ define <vscale x 2 x i64> @intrinsic_vzext_vf4_nxv2i64(<vscale x 2 x i16> %0, iX
; CHECK-LABEL: intrinsic_vzext_vf4_nxv2i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vzext.vf4 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vzext.vf4 v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vzext.nxv2i64.nxv2i16(
@@ -277,8 +277,8 @@ define <vscale x 4 x i64> @intrinsic_vzext_vf4_nxv4i64(<vscale x 4 x i16> %0, iX
; CHECK-LABEL: intrinsic_vzext_vf4_nxv4i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT: vzext.vf4 v12, v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vzext.vf4 v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vzext.nxv4i64.nxv4i16(
@@ -321,8 +321,8 @@ define <vscale x 8 x i64> @intrinsic_vzext_vf4_nxv8i64(<vscale x 8 x i16> %0, iX
; CHECK-LABEL: intrinsic_vzext_vf4_nxv8i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vzext.vf4 v16, v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: vzext.vf4 v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vzext.nxv8i64.nxv8i16(
@@ -453,8 +453,8 @@ define <vscale x 4 x i32> @intrinsic_vzext_vf4_nxv4i32(<vscale x 4 x i8> %0, iXL
; CHECK-LABEL: intrinsic_vzext_vf4_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vzext.vf4 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vzext.vf4 v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vzext.nxv4i32.nxv4i8(
@@ -497,8 +497,8 @@ define <vscale x 8 x i32> @intrinsic_vzext_vf4_nxv8i32(<vscale x 8 x i8> %0, iXL
; CHECK-LABEL: intrinsic_vzext_vf4_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vzext.vf4 v12, v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vzext.vf4 v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vzext.nxv8i32.nxv8i8(
@@ -541,8 +541,8 @@ define <vscale x 16 x i32> @intrinsic_vzext_vf4_nxv16i32(<vscale x 16 x i8> %0,
; CHECK-LABEL: intrinsic_vzext_vf4_nxv16i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT: vzext.vf4 v16, v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv2r.v v16, v8
+; CHECK-NEXT: vzext.vf4 v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vzext.nxv16i32.nxv16i8(
@@ -629,8 +629,8 @@ define <vscale x 2 x i64> @intrinsic_vzext_vf2_nxv2i64(<vscale x 2 x i32> %0, iX
; CHECK-LABEL: intrinsic_vzext_vf2_nxv2i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vzext.vf2 v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vzext.nxv2i64.nxv2i32(
@@ -673,8 +673,8 @@ define <vscale x 4 x i64> @intrinsic_vzext_vf2_nxv4i64(<vscale x 4 x i32> %0, iX
; CHECK-LABEL: intrinsic_vzext_vf2_nxv4i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT: vzext.vf2 v12, v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vzext.vf2 v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vzext.nxv4i64.nxv4i32(
@@ -717,8 +717,8 @@ define <vscale x 8 x i64> @intrinsic_vzext_vf2_nxv8i64(<vscale x 8 x i32> %0, iX
; CHECK-LABEL: intrinsic_vzext_vf2_nxv8i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vzext.vf2 v16, v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vzext.vf2 v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vzext.nxv8i64.nxv8i32(
@@ -849,8 +849,8 @@ define <vscale x 4 x i32> @intrinsic_vzext_vf2_nxv4i32(<vscale x 4 x i16> %0, iX
; CHECK-LABEL: intrinsic_vzext_vf2_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vzext.vf2 v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vzext.nxv4i32.nxv4i16(
@@ -893,8 +893,8 @@ define <vscale x 8 x i32> @intrinsic_vzext_vf2_nxv8i32(<vscale x 8 x i16> %0, iX
; CHECK-LABEL: intrinsic_vzext_vf2_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vzext.vf2 v12, v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vzext.vf2 v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vzext.nxv8i32.nxv8i16(
@@ -937,8 +937,8 @@ define <vscale x 16 x i32> @intrinsic_vzext_vf2_nxv16i32(<vscale x 16 x i16> %0,
; CHECK-LABEL: intrinsic_vzext_vf2_nxv16i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT: vzext.vf2 v16, v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vzext.vf2 v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vzext.nxv16i32.nxv16i16(
@@ -1113,8 +1113,8 @@ define <vscale x 8 x i16> @intrinsic_vzext_vf2_nxv8i16(<vscale x 8 x i8> %0, iXL
; CHECK-LABEL: intrinsic_vzext_vf2_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vzext.vf2 v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vzext.nxv8i16.nxv8i8(
@@ -1157,8 +1157,8 @@ define <vscale x 16 x i16> @intrinsic_vzext_vf2_nxv16i16(<vscale x 16 x i8> %0,
; CHECK-LABEL: intrinsic_vzext_vf2_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vzext.vf2 v12, v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vzext.vf2 v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vzext.nxv16i16.nxv16i8(
@@ -1201,8 +1201,8 @@ define <vscale x 32 x i16> @intrinsic_vzext_vf2_nxv32i16(<vscale x 32 x i8> %0,
; CHECK-LABEL: intrinsic_vzext_vf2_nxv32i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; CHECK-NEXT: vzext.vf2 v16, v8
-; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vzext.vf2 v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vzext.nxv32i16.nxv32i8(
diff --git a/llvm/test/CodeGen/RISCV/rvv/zvbb-demanded-bits.ll b/llvm/test/CodeGen/RISCV/rvv/zvbb-demanded-bits.ll
index 47622a62d0eaf..107d59670c4c9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/zvbb-demanded-bits.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/zvbb-demanded-bits.ll
@@ -33,10 +33,10 @@ define <vscale x 1 x i8> @vror_vx_nxv1i8(<vscale x 1 x i8> %a, i8 %b) {
define <vscale x 2 x i64> @vwsll_vx_i8_nxv2i64_zext(<vscale x 2 x i32> %a, i8 %b) {
; CHECK-LABEL: vwsll_vx_i8_nxv2i64_zext:
; CHECK: # %bb.0:
-; CHECK-NEXT: addiw a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT: vwsll.vx v10, v8, a0
-; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: addiw a0, a0, 1
+; CHECK-NEXT: vwsll.vx v8, v10, a0
; CHECK-NEXT: ret
%s = add i8 %b, 1
%head = insertelement <vscale x 2 x i8> poison, i8 %s, i32 0
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index 42c87c9660dc9..daeb306b7e85f 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -667,17 +667,17 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV32MV-NEXT: vsetivli zero, 3, e8, mf2, ta, ma
; RV32MV-NEXT: vmv.v.i v10, 1
; RV32MV-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32MV-NEXT: vmv.v.i v11, 0
+; RV32MV-NEXT: vmv.v.i v12, 0
; RV32MV-NEXT: vsetivli zero, 3, e8, mf2, tu, ma
-; RV32MV-NEXT: vslideup.vi v11, v10, 2
+; RV32MV-NEXT: vslideup.vi v12, v10, 2
; RV32MV-NEXT: vsetivli zero, 5, e8, mf2, ta, ma
; RV32MV-NEXT: vmv.v.i v10, 2
; RV32MV-NEXT: vsetvli zero, zero, e8, mf2, tu, ma
-; RV32MV-NEXT: vslideup.vi v11, v10, 4
+; RV32MV-NEXT: vslideup.vi v12, v10, 4
; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32MV-NEXT: vsext.vf4 v12, v11
+; RV32MV-NEXT: vsext.vf4 v10, v12
; RV32MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32MV-NEXT: vmsne.vv v0, v8, v12
+; RV32MV-NEXT: vmsne.vv v0, v8, v10
; RV32MV-NEXT: vmv.v.i v8, 0
; RV32MV-NEXT: vmerge.vim v8, v8, -1, v0
; RV32MV-NEXT: vsetvli zero, zero, e32, m1, ta, ma
@@ -735,7 +735,7 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV64MV-NEXT: ld a6, %lo(.LCPI3_2)(a6)
; RV64MV-NEXT: addi a7, a7, 256
; RV64MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64MV-NEXT: vmv.s.x v8, a7
+; RV64MV-NEXT: vmv.s.x v10, a7
; RV64MV-NEXT: slli a3, a3, 32
; RV64MV-NEXT: srli a7, a1, 2
; RV64MV-NEXT: or a3, a2, a3
@@ -766,16 +766,16 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV64MV-NEXT: add a1, a1, a5
; RV64MV-NEXT: li a4, -1
; RV64MV-NEXT: srli a4, a4, 31
-; RV64MV-NEXT: vsext.vf8 v10, v8
+; RV64MV-NEXT: vsext.vf8 v8, v10
; RV64MV-NEXT: add a3, a3, a6
; RV64MV-NEXT: slli a6, a6, 2
-; RV64MV-NEXT: vmv.v.x v8, a1
+; RV64MV-NEXT: vmv.v.x v10, a1
; RV64MV-NEXT: add a3, a3, a6
-; RV64MV-NEXT: vslide1down.vx v8, v8, a2
-; RV64MV-NEXT: vslide1down.vx v8, v8, a3
-; RV64MV-NEXT: vslidedown.vi v8, v8, 1
-; RV64MV-NEXT: vand.vx v8, v8, a4
-; RV64MV-NEXT: vmsne.vv v0, v8, v10
+; RV64MV-NEXT: vslide1down.vx v10, v10, a2
+; RV64MV-NEXT: vslide1down.vx v10, v10, a3
+; RV64MV-NEXT: vslidedown.vi v10, v10, 1
+; RV64MV-NEXT: vand.vx v10, v10, a4
+; RV64MV-NEXT: vmsne.vv v0, v10, v8
; RV64MV-NEXT: vmv.v.i v8, 0
; RV64MV-NEXT: vmerge.vim v8, v8, -1, v0
; RV64MV-NEXT: vslidedown.vi v10, v8, 2
>From 6cf0b43d9aec4741f1f1a2024a76090af2eb8d71 Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Thu, 13 Mar 2025 16:51:15 -0700
Subject: [PATCH 2/2] Address review comment
---
llvm/lib/Target/RISCV/RISCVRegisterInfo.td | 9 +-
.../rvv/fixed-vectors-interleaved-access.ll | 16 +-
.../CodeGen/RISCV/rvv/vector-deinterleave.ll | 28 +-
.../CodeGen/RISCV/rvv/vector-interleave.ll | 68 +--
llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll | 12 +-
llvm/test/CodeGen/RISCV/rvv/vloxseg-rv32.ll | 416 +++++++++---------
llvm/test/CodeGen/RISCV/rvv/vloxseg-rv64.ll | 416 +++++++++---------
llvm/test/CodeGen/RISCV/rvv/vluxseg-rv32.ll | 416 +++++++++---------
llvm/test/CodeGen/RISCV/rvv/vluxseg-rv64.ll | 416 +++++++++---------
9 files changed, 897 insertions(+), 900 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index 1e0541e667895..3e4d71db995a0 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -718,6 +718,9 @@ class VReg<list<ValueType> regTypes, dag regList, int Vlmul>
regList> {
let IsVRegClass = 1;
let VLMul = Vlmul;
+
+ // Prefer to allocate high LMUL registers first.
+ let AllocationPriority = !if(!gt(Vlmul, 1), Vlmul, 0);
}
defvar VMaskVTs = [vbool1_t, vbool2_t, vbool4_t, vbool8_t, vbool16_t,
@@ -752,24 +755,18 @@ def VR : VReg<!listconcat(VM1VTs, VMaskVTs),
def VRNoV0 : VReg<!listconcat(VM1VTs, VMaskVTs), (sub VR, V0), 1>;
-let AllocationPriority = 2 in
def VRM2 : VReg<VM2VTs, (add (sequence "V%uM2", 8, 31, 2),
(sequence "V%uM2", 6, 0, 2)), 2>;
-let AllocationPriority = 2 in
def VRM2NoV0 : VReg<VM2VTs, (sub VRM2, V0M2), 2>;
-let AllocationPriority = 4 in
def VRM4 : VReg<VM4VTs, (add V8M4, V12M4, V16M4, V20M4,
V24M4, V28M4, V4M4, V0M4), 4>;
-let AllocationPriority = 4 in
def VRM4NoV0 : VReg<VM4VTs, (sub VRM4, V0M4), 4>;
-let AllocationPriority = 8 in
def VRM8 : VReg<VM8VTs, (add V8M8, V16M8, V24M8, V0M8), 8>;
-let AllocationPriority = 8 in
def VRM8NoV0 : VReg<VM8VTs, (sub VRM8, V0M8), 8>;
def VMV0 : VReg<VMaskVTs, (add V0), 1>;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index 954a5889f31ba..61bafd3cfeeef 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -1311,17 +1311,17 @@ define void @load_factor4_one_active_storeback_full(ptr %ptr) {
; CHECK-LABEL: load_factor4_one_active_storeback_full:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT: vle32.v v12, (a0)
+; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v12, 4
-; CHECK-NEXT: vmv1r.v v17, v12
-; CHECK-NEXT: vmv1r.v v18, v8
+; CHECK-NEXT: vslidedown.vi v12, v8, 4
+; CHECK-NEXT: vmv1r.v v13, v8
+; CHECK-NEXT: vmv1r.v v14, v12
; CHECK-NEXT: vsetivli zero, 4, e32, m4, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v12, 8
-; CHECK-NEXT: vmv1r.v v19, v8
-; CHECK-NEXT: vslidedown.vi v20, v12, 12
+; CHECK-NEXT: vslidedown.vi v16, v8, 8
+; CHECK-NEXT: vmv1r.v v15, v16
+; CHECK-NEXT: vslidedown.vi v16, v8, 12
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vsseg4e32.v v17, (a0)
+; CHECK-NEXT: vsseg4e32.v v13, (a0)
; CHECK-NEXT: ret
%interleaved.vec = load <16 x i32>, ptr %ptr
%v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
index 319dfb037c15a..c3a51986a7f8c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
@@ -609,21 +609,21 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16
; CHECK-NEXT: vmv1r.v v12, v13
; CHECK-NEXT: vs8r.v v8, (a1)
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
-; CHECK-NEXT: vlseg5e8.v v12, (a0)
-; CHECK-NEXT: vlseg5e8.v v18, (a1)
-; CHECK-NEXT: vmv2r.v v8, v12
-; CHECK-NEXT: vmv2r.v v10, v14
-; CHECK-NEXT: vmv1r.v v9, v18
+; CHECK-NEXT: vlseg5e8.v v8, (a0)
+; CHECK-NEXT: vlseg5e8.v v14, (a1)
+; CHECK-NEXT: vmv2r.v v20, v8
+; CHECK-NEXT: vmv2r.v v22, v10
+; CHECK-NEXT: vmv1r.v v21, v14
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmsne.vi v0, v8, 0
-; CHECK-NEXT: vmv1r.v v18, v13
-; CHECK-NEXT: vmsne.vi v8, v18, 0
-; CHECK-NEXT: vmv1r.v v11, v20
-; CHECK-NEXT: vmsne.vi v9, v10, 0
-; CHECK-NEXT: vmv1r.v v20, v15
-; CHECK-NEXT: vmsne.vi v10, v20, 0
-; CHECK-NEXT: vmv1r.v v17, v22
-; CHECK-NEXT: vmsne.vi v11, v16, 0
+; CHECK-NEXT: vmsne.vi v0, v20, 0
+; CHECK-NEXT: vmv1r.v v14, v9
+; CHECK-NEXT: vmsne.vi v8, v14, 0
+; CHECK-NEXT: vmv1r.v v23, v16
+; CHECK-NEXT: vmsne.vi v9, v22, 0
+; CHECK-NEXT: vmv1r.v v16, v11
+; CHECK-NEXT: vmsne.vi v10, v16, 0
+; CHECK-NEXT: vmv1r.v v13, v18
+; CHECK-NEXT: vmsne.vi v11, v12, 0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
index a41ed52343099..b0eac8bdf48dd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
@@ -2080,37 +2080,37 @@ define <vscale x 112 x i1> @vector_interleave_nxv112i1_nxv16i1(<vscale x 16 x i1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: vmerge.vim v18, v14, 1, v0
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v16, v14, 1, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmerge.vim v22, v14, 1, v0
; CHECK-NEXT: add a3, a4, a2
; CHECK-NEXT: srli a1, a2, 2
; CHECK-NEXT: add a5, a0, a2
-; CHECK-NEXT: vmv2r.v v2, v18
+; CHECK-NEXT: vmv4r.v v24, v16
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vmerge.vim v20, v14, 1, v0
+; CHECK-NEXT: vmerge.vim v18, v14, 1, v0
; CHECK-NEXT: add a6, a3, a2
-; CHECK-NEXT: vmv1r.v v3, v16
+; CHECK-NEXT: vmv1r.v v25, v22
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vmerge.vim v8, v14, 1, v0
-; CHECK-NEXT: vmv1r.v v4, v20
+; CHECK-NEXT: vmv1r.v v26, v18
; CHECK-NEXT: vmv1r.v v0, v11
-; CHECK-NEXT: vmerge.vim v22, v14, 1, v0
-; CHECK-NEXT: vmv1r.v v5, v8
+; CHECK-NEXT: vmerge.vim v20, v14, 1, v0
+; CHECK-NEXT: vmv1r.v v27, v8
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vmerge.vim v10, v14, 1, v0
-; CHECK-NEXT: vmv1r.v v6, v22
-; CHECK-NEXT: vmv1r.v v20, v17
+; CHECK-NEXT: vmv1r.v v28, v20
+; CHECK-NEXT: vmv1r.v v18, v23
; CHECK-NEXT: add a7, a6, a2
-; CHECK-NEXT: vmv1r.v v7, v10
-; CHECK-NEXT: vmv1r.v v22, v9
+; CHECK-NEXT: vmv1r.v v29, v10
+; CHECK-NEXT: vmv1r.v v20, v9
; CHECK-NEXT: vmv1r.v v0, v13
-; CHECK-NEXT: vmerge.vim v8, v14, 1, v0
-; CHECK-NEXT: vmv1r.v v24, v11
+; CHECK-NEXT: vmerge.vim v30, v14, 1, v0
+; CHECK-NEXT: vmv1r.v v22, v11
; CHECK-NEXT: vsetvli t0, zero, e8, m1, ta, ma
-; CHECK-NEXT: vsseg7e8.v v2, (a4)
-; CHECK-NEXT: vmv1r.v v25, v9
-; CHECK-NEXT: vsseg7e8.v v19, (a0)
+; CHECK-NEXT: vsseg7e8.v v24, (a4)
+; CHECK-NEXT: vmv1r.v v23, v31
+; CHECK-NEXT: vsseg7e8.v v17, (a0)
; CHECK-NEXT: vl1r.v v8, (a6)
; CHECK-NEXT: add a6, a7, a2
; CHECK-NEXT: vl1r.v v10, (a4)
@@ -2174,37 +2174,37 @@ define <vscale x 112 x i1> @vector_interleave_nxv112i1_nxv16i1(<vscale x 16 x i1
; ZVBB-NEXT: add a0, sp, a0
; ZVBB-NEXT: addi a0, a0, 16
; ZVBB-NEXT: csrr a2, vlenb
-; ZVBB-NEXT: vmerge.vim v18, v14, 1, v0
-; ZVBB-NEXT: vmv1r.v v0, v8
; ZVBB-NEXT: vmerge.vim v16, v14, 1, v0
+; ZVBB-NEXT: vmv1r.v v0, v8
+; ZVBB-NEXT: vmerge.vim v22, v14, 1, v0
; ZVBB-NEXT: add a3, a4, a2
; ZVBB-NEXT: srli a1, a2, 2
; ZVBB-NEXT: add a5, a0, a2
-; ZVBB-NEXT: vmv2r.v v2, v18
+; ZVBB-NEXT: vmv4r.v v24, v16
; ZVBB-NEXT: vmv1r.v v0, v9
-; ZVBB-NEXT: vmerge.vim v20, v14, 1, v0
+; ZVBB-NEXT: vmerge.vim v18, v14, 1, v0
; ZVBB-NEXT: add a6, a3, a2
-; ZVBB-NEXT: vmv1r.v v3, v16
+; ZVBB-NEXT: vmv1r.v v25, v22
; ZVBB-NEXT: vmv1r.v v0, v10
; ZVBB-NEXT: vmerge.vim v8, v14, 1, v0
-; ZVBB-NEXT: vmv1r.v v4, v20
+; ZVBB-NEXT: vmv1r.v v26, v18
; ZVBB-NEXT: vmv1r.v v0, v11
-; ZVBB-NEXT: vmerge.vim v22, v14, 1, v0
-; ZVBB-NEXT: vmv1r.v v5, v8
+; ZVBB-NEXT: vmerge.vim v20, v14, 1, v0
+; ZVBB-NEXT: vmv1r.v v27, v8
; ZVBB-NEXT: vmv1r.v v0, v12
; ZVBB-NEXT: vmerge.vim v10, v14, 1, v0
-; ZVBB-NEXT: vmv1r.v v6, v22
-; ZVBB-NEXT: vmv1r.v v20, v17
+; ZVBB-NEXT: vmv1r.v v28, v20
+; ZVBB-NEXT: vmv1r.v v18, v23
; ZVBB-NEXT: add a7, a6, a2
-; ZVBB-NEXT: vmv1r.v v7, v10
-; ZVBB-NEXT: vmv1r.v v22, v9
+; ZVBB-NEXT: vmv1r.v v29, v10
+; ZVBB-NEXT: vmv1r.v v20, v9
; ZVBB-NEXT: vmv1r.v v0, v13
-; ZVBB-NEXT: vmerge.vim v8, v14, 1, v0
-; ZVBB-NEXT: vmv1r.v v24, v11
+; ZVBB-NEXT: vmerge.vim v30, v14, 1, v0
+; ZVBB-NEXT: vmv1r.v v22, v11
; ZVBB-NEXT: vsetvli t0, zero, e8, m1, ta, ma
-; ZVBB-NEXT: vsseg7e8.v v2, (a4)
-; ZVBB-NEXT: vmv1r.v v25, v9
-; ZVBB-NEXT: vsseg7e8.v v19, (a0)
+; ZVBB-NEXT: vsseg7e8.v v24, (a4)
+; ZVBB-NEXT: vmv1r.v v23, v31
+; ZVBB-NEXT: vsseg7e8.v v17, (a0)
; ZVBB-NEXT: vl1r.v v8, (a6)
; ZVBB-NEXT: add a6, a7, a2
; ZVBB-NEXT: vl1r.v v10, (a4)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll
index 901f3cd63fa9e..7f38ad5211d2d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll
@@ -1162,12 +1162,12 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64(<vscale x 16 x double> %va, <vsc
; CHECK-NEXT: mul a2, a2, a3
; CHECK-NEXT: add a2, sp, a2
; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: add a2, sp, a2
; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: li a3, 40
; CHECK-NEXT: mul a2, a2, a3
@@ -1175,7 +1175,7 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64(<vscale x 16 x double> %va, <vsc
; CHECK-NEXT: addi a2, a2, 16
; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t
+; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a2, 40
; CHECK-NEXT: mul a0, a0, a2
@@ -1191,16 +1191,16 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64(<vscale x 16 x double> %va, <vsc
; CHECK-NEXT: slli a0, a0, 5
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma
-; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t
+; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 40
; CHECK-NEXT: mul a0, a0, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv32.ll
index 4f7286aeeda1e..66dd191a82ce0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv32.ll
@@ -3025,8 +3025,8 @@ define <vscale x 8 x i16> @test_vloxseg2_nxv8i16_triscv.vector.tuple_nxv16i8_2t_
; CHECK-LABEL: test_vloxseg2_nxv8i16_triscv.vector.tuple_nxv16i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, i32 %vl, i32 4)
@@ -3038,8 +3038,8 @@ define <vscale x 8 x i16> @test_vloxseg2_mask_nxv8i16_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg2_mask_nxv8i16_triscv.vector.tuple_nxv16i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4)
@@ -3112,8 +3112,8 @@ define <vscale x 16 x i16> @test_vloxseg2_nxv16i16_triscv.vector.tuple_nxv32i8_2
; CHECK-LABEL: test_vloxseg2_nxv16i16_triscv.vector.tuple_nxv32i8_2t_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.nxv16i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i8> %index, i32 %vl, i32 4)
@@ -3125,8 +3125,8 @@ define <vscale x 16 x i16> @test_vloxseg2_mask_nxv16i16_triscv.vector.tuple_nxv3
; CHECK-LABEL: test_vloxseg2_mask_nxv16i16_triscv.vector.tuple_nxv32i8_2t_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1.nxv16i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i8> %index, <vscale x 16 x i1> %mask, i32 %vl, i32 1, i32 4)
@@ -3460,8 +3460,8 @@ define <vscale x 8 x i16> @test_vloxseg3_nxv8i16_triscv.vector.tuple_nxv16i8_3t_
; CHECK-LABEL: test_vloxseg3_nxv8i16_triscv.vector.tuple_nxv16i8_3t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i8> %index, i32 %vl, i32 4)
@@ -3473,8 +3473,8 @@ define <vscale x 8 x i16> @test_vloxseg3_mask_nxv8i16_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg3_mask_nxv8i16_triscv.vector.tuple_nxv16i8_3t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4)
@@ -3808,8 +3808,8 @@ define <vscale x 8 x i16> @test_vloxseg4_nxv8i16_triscv.vector.tuple_nxv16i8_4t_
; CHECK-LABEL: test_vloxseg4_nxv8i16_triscv.vector.tuple_nxv16i8_4t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei8.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i8> %index, i32 %vl, i32 4)
@@ -3821,8 +3821,8 @@ define <vscale x 8 x i16> @test_vloxseg4_mask_nxv8i16_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg4_mask_nxv8i16_triscv.vector.tuple_nxv16i8_4t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei8.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4)
@@ -5113,8 +5113,8 @@ define <vscale x 4 x i32> @test_vloxseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t_
; CHECK-LABEL: test_vloxseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, i32 %vl, i32 5)
@@ -5126,8 +5126,8 @@ define <vscale x 4 x i32> @test_vloxseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -5142,8 +5142,8 @@ define <vscale x 4 x i32> @test_vloxseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t_
; CHECK-LABEL: test_vloxseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei16.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, i32 %vl, i32 5)
@@ -5155,8 +5155,8 @@ define <vscale x 4 x i32> @test_vloxseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei16.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -5200,8 +5200,8 @@ define <vscale x 8 x i32> @test_vloxseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t_
; CHECK-LABEL: test_vloxseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.nxv8i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, i32 %vl, i32 5)
@@ -5213,8 +5213,8 @@ define <vscale x 8 x i32> @test_vloxseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i
; CHECK-LABEL: test_vloxseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -5229,8 +5229,8 @@ define <vscale x 8 x i32> @test_vloxseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t_
; CHECK-LABEL: test_vloxseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxseg2ei16.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.nxv8i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i16> %index, i32 %vl, i32 5)
@@ -5242,8 +5242,8 @@ define <vscale x 8 x i32> @test_vloxseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i
; CHECK-LABEL: test_vloxseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i8_2t_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxseg2ei16.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1.nxv8i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i16> %index, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -5461,8 +5461,8 @@ define <vscale x 4 x i32> @test_vloxseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t_
; CHECK-LABEL: test_vloxseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i8> %index, i32 %vl, i32 5)
@@ -5474,8 +5474,8 @@ define <vscale x 4 x i32> @test_vloxseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i8_3t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -5490,8 +5490,8 @@ define <vscale x 4 x i32> @test_vloxseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t_
; CHECK-LABEL: test_vloxseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei16.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i16> %index, i32 %vl, i32 5)
@@ -5503,8 +5503,8 @@ define <vscale x 4 x i32> @test_vloxseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i8_3t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei16.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -5722,8 +5722,8 @@ define <vscale x 4 x i32> @test_vloxseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t_
; CHECK-LABEL: test_vloxseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei8.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i8> %index, i32 %vl, i32 5)
@@ -5735,8 +5735,8 @@ define <vscale x 4 x i32> @test_vloxseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i8_4t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei8.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -5751,8 +5751,8 @@ define <vscale x 4 x i32> @test_vloxseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t_
; CHECK-LABEL: test_vloxseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei16.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i16> %index, i32 %vl, i32 5)
@@ -5764,8 +5764,8 @@ define <vscale x 4 x i32> @test_vloxseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i8_4t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei16.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -6592,8 +6592,8 @@ define <vscale x 2 x i64> @test_vloxseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t_
; CHECK-LABEL: test_vloxseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i8> %index, i32 %vl, i32 6)
@@ -6605,8 +6605,8 @@ define <vscale x 2 x i64> @test_vloxseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i8> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -6621,8 +6621,8 @@ define <vscale x 2 x i64> @test_vloxseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t_
; CHECK-LABEL: test_vloxseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei16.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i16> %index, i32 %vl, i32 6)
@@ -6634,8 +6634,8 @@ define <vscale x 2 x i64> @test_vloxseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei16.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i16> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -6650,8 +6650,8 @@ define <vscale x 2 x i64> @test_vloxseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t_
; CHECK-LABEL: test_vloxseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei32.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei32.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i32> %index, i32 %vl, i32 6)
@@ -6663,8 +6663,8 @@ define <vscale x 2 x i64> @test_vloxseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei32.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei32.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i32> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -6679,8 +6679,8 @@ define <vscale x 4 x i64> @test_vloxseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t_
; CHECK-LABEL: test_vloxseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.nxv4i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, i32 %vl, i32 6)
@@ -6692,8 +6692,8 @@ define <vscale x 4 x i64> @test_vloxseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i
; CHECK-LABEL: test_vloxseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -6708,8 +6708,8 @@ define <vscale x 4 x i64> @test_vloxseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t_
; CHECK-LABEL: test_vloxseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg2ei16.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.nxv4i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, i32 %vl, i32 6)
@@ -6721,8 +6721,8 @@ define <vscale x 4 x i64> @test_vloxseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i
; CHECK-LABEL: test_vloxseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg2ei16.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -6737,8 +6737,8 @@ define <vscale x 4 x i64> @test_vloxseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t_
; CHECK-LABEL: test_vloxseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei32.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxseg2ei32.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.nxv4i32(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i32> %index, i32 %vl, i32 6)
@@ -6750,8 +6750,8 @@ define <vscale x 4 x i64> @test_vloxseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i
; CHECK-LABEL: test_vloxseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei32.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxseg2ei32.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1.nxv4i32(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i32> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -6853,8 +6853,8 @@ define <vscale x 2 x i64> @test_vloxseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t_
; CHECK-LABEL: test_vloxseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i8> %index, i32 %vl, i32 6)
@@ -6866,8 +6866,8 @@ define <vscale x 2 x i64> @test_vloxseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i8> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -6882,8 +6882,8 @@ define <vscale x 2 x i64> @test_vloxseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t_
; CHECK-LABEL: test_vloxseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei16.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i16> %index, i32 %vl, i32 6)
@@ -6895,8 +6895,8 @@ define <vscale x 2 x i64> @test_vloxseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei16.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i16> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -6911,8 +6911,8 @@ define <vscale x 2 x i64> @test_vloxseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t_
; CHECK-LABEL: test_vloxseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei32.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei32.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i32> %index, i32 %vl, i32 6)
@@ -6924,8 +6924,8 @@ define <vscale x 2 x i64> @test_vloxseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei32.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei32.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i32> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -7027,8 +7027,8 @@ define <vscale x 2 x i64> @test_vloxseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t_
; CHECK-LABEL: test_vloxseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei8.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i8> %index, i32 %vl, i32 6)
@@ -7040,8 +7040,8 @@ define <vscale x 2 x i64> @test_vloxseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei8.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i8> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -7056,8 +7056,8 @@ define <vscale x 2 x i64> @test_vloxseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t_
; CHECK-LABEL: test_vloxseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei16.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i16> %index, i32 %vl, i32 6)
@@ -7069,8 +7069,8 @@ define <vscale x 2 x i64> @test_vloxseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei16.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i16> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -7085,8 +7085,8 @@ define <vscale x 2 x i64> @test_vloxseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t_
; CHECK-LABEL: test_vloxseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei32.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei32.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i32> %index, i32 %vl, i32 6)
@@ -7098,8 +7098,8 @@ define <vscale x 2 x i64> @test_vloxseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei32.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei32.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i32> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -7703,8 +7703,8 @@ define <vscale x 8 x half> @test_vloxseg2_nxv8f16_triscv.vector.tuple_nxv16i8_2t
; CHECK-LABEL: test_vloxseg2_nxv8f16_triscv.vector.tuple_nxv16i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, i32 %vl, i32 4)
@@ -7716,8 +7716,8 @@ define <vscale x 8 x half> @test_vloxseg2_mask_nxv8f16_triscv.vector.tuple_nxv16
; CHECK-LABEL: test_vloxseg2_mask_nxv8f16_triscv.vector.tuple_nxv16i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4)
@@ -7784,8 +7784,8 @@ define <vscale x 16 x half> @test_vloxseg2_nxv16f16_triscv.vector.tuple_nxv32i8_
; CHECK-LABEL: test_vloxseg2_nxv16f16_triscv.vector.tuple_nxv32i8_2t_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.nxv16i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i8> %index, i32 %vl, i32 4)
@@ -7797,8 +7797,8 @@ define <vscale x 16 x half> @test_vloxseg2_mask_nxv16f16_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vloxseg2_mask_nxv16f16_triscv.vector.tuple_nxv32i8_2t_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1.nxv16i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i8> %index, <vscale x 16 x i1> %mask, i32 %vl, i32 1, i32 4)
@@ -8108,8 +8108,8 @@ define <vscale x 8 x half> @test_vloxseg3_nxv8f16_triscv.vector.tuple_nxv16i8_3t
; CHECK-LABEL: test_vloxseg3_nxv8f16_triscv.vector.tuple_nxv16i8_3t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i8> %index, i32 %vl, i32 4)
@@ -8121,8 +8121,8 @@ define <vscale x 8 x half> @test_vloxseg3_mask_nxv8f16_triscv.vector.tuple_nxv16
; CHECK-LABEL: test_vloxseg3_mask_nxv8f16_triscv.vector.tuple_nxv16i8_3t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4)
@@ -8432,8 +8432,8 @@ define <vscale x 8 x half> @test_vloxseg4_nxv8f16_triscv.vector.tuple_nxv16i8_4t
; CHECK-LABEL: test_vloxseg4_nxv8f16_triscv.vector.tuple_nxv16i8_4t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei8.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i8> %index, i32 %vl, i32 4)
@@ -8445,8 +8445,8 @@ define <vscale x 8 x half> @test_vloxseg4_mask_nxv8f16_triscv.vector.tuple_nxv16
; CHECK-LABEL: test_vloxseg4_mask_nxv8f16_triscv.vector.tuple_nxv16i8_4t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei8.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4)
@@ -9647,8 +9647,8 @@ define <vscale x 4 x float> @test_vloxseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2
; CHECK-LABEL: test_vloxseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, i32 %vl, i32 5)
@@ -9660,8 +9660,8 @@ define <vscale x 4 x float> @test_vloxseg2_mask_nxv4f32_triscv.vector.tuple_nxv1
; CHECK-LABEL: test_vloxseg2_mask_nxv4f32_triscv.vector.tuple_nxv16i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -9674,8 +9674,8 @@ define <vscale x 4 x float> @test_vloxseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2
; CHECK-LABEL: test_vloxseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei16.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, i32 %vl, i32 5)
@@ -9687,8 +9687,8 @@ define <vscale x 4 x float> @test_vloxseg2_mask_nxv4f32_triscv.vector.tuple_nxv1
; CHECK-LABEL: test_vloxseg2_mask_nxv4f32_triscv.vector.tuple_nxv16i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei16.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -9728,8 +9728,8 @@ define <vscale x 8 x float> @test_vloxseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2
; CHECK-LABEL: test_vloxseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.nxv8i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, i32 %vl, i32 5)
@@ -9741,8 +9741,8 @@ define <vscale x 8 x float> @test_vloxseg2_mask_nxv8f32_triscv.vector.tuple_nxv3
; CHECK-LABEL: test_vloxseg2_mask_nxv8f32_triscv.vector.tuple_nxv32i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -9755,8 +9755,8 @@ define <vscale x 8 x float> @test_vloxseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2
; CHECK-LABEL: test_vloxseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2t_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxseg2ei16.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.nxv8i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i16> %index, i32 %vl, i32 5)
@@ -9768,8 +9768,8 @@ define <vscale x 8 x float> @test_vloxseg2_mask_nxv8f32_triscv.vector.tuple_nxv3
; CHECK-LABEL: test_vloxseg2_mask_nxv8f32_triscv.vector.tuple_nxv32i8_2t_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxseg2ei16.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1.nxv8i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i16> %index, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -9971,8 +9971,8 @@ define <vscale x 4 x float> @test_vloxseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3
; CHECK-LABEL: test_vloxseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i8> %index, i32 %vl, i32 5)
@@ -9984,8 +9984,8 @@ define <vscale x 4 x float> @test_vloxseg3_mask_nxv4f32_triscv.vector.tuple_nxv1
; CHECK-LABEL: test_vloxseg3_mask_nxv4f32_triscv.vector.tuple_nxv16i8_3t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -9998,8 +9998,8 @@ define <vscale x 4 x float> @test_vloxseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3
; CHECK-LABEL: test_vloxseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei16.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i16> %index, i32 %vl, i32 5)
@@ -10011,8 +10011,8 @@ define <vscale x 4 x float> @test_vloxseg3_mask_nxv4f32_triscv.vector.tuple_nxv1
; CHECK-LABEL: test_vloxseg3_mask_nxv4f32_triscv.vector.tuple_nxv16i8_3t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei16.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -10214,8 +10214,8 @@ define <vscale x 4 x float> @test_vloxseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4
; CHECK-LABEL: test_vloxseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei8.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i8> %index, i32 %vl, i32 5)
@@ -10227,8 +10227,8 @@ define <vscale x 4 x float> @test_vloxseg4_mask_nxv4f32_triscv.vector.tuple_nxv1
; CHECK-LABEL: test_vloxseg4_mask_nxv4f32_triscv.vector.tuple_nxv16i8_4t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei8.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -10241,8 +10241,8 @@ define <vscale x 4 x float> @test_vloxseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4
; CHECK-LABEL: test_vloxseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei16.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i16> %index, i32 %vl, i32 5)
@@ -10254,8 +10254,8 @@ define <vscale x 4 x float> @test_vloxseg4_mask_nxv4f32_triscv.vector.tuple_nxv1
; CHECK-LABEL: test_vloxseg4_mask_nxv4f32_triscv.vector.tuple_nxv16i8_4t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei16.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -11024,8 +11024,8 @@ define <vscale x 2 x double> @test_vloxseg2_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vloxseg2_nxv2f64_triscv.vector.tuple_nxv16i8_2t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i8> %index, i32 %vl, i32 6)
@@ -11037,8 +11037,8 @@ define <vscale x 2 x double> @test_vloxseg2_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vloxseg2_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i8> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -11051,8 +11051,8 @@ define <vscale x 2 x double> @test_vloxseg2_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vloxseg2_nxv2f64_triscv.vector.tuple_nxv16i8_2t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei16.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i16> %index, i32 %vl, i32 6)
@@ -11064,8 +11064,8 @@ define <vscale x 2 x double> @test_vloxseg2_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vloxseg2_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei16.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i16> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -11078,8 +11078,8 @@ define <vscale x 2 x double> @test_vloxseg2_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vloxseg2_nxv2f64_triscv.vector.tuple_nxv16i8_2t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei32.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei32.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i32> %index, i32 %vl, i32 6)
@@ -11091,8 +11091,8 @@ define <vscale x 2 x double> @test_vloxseg2_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vloxseg2_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei32.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei32.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i32> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -11105,8 +11105,8 @@ define <vscale x 4 x double> @test_vloxseg2_nxv4f64_triscv.vector.tuple_nxv32i8_
; CHECK-LABEL: test_vloxseg2_nxv4f64_triscv.vector.tuple_nxv32i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.nxv4i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, i32 %vl, i32 6)
@@ -11118,8 +11118,8 @@ define <vscale x 4 x double> @test_vloxseg2_mask_nxv4f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vloxseg2_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -11132,8 +11132,8 @@ define <vscale x 4 x double> @test_vloxseg2_nxv4f64_triscv.vector.tuple_nxv32i8_
; CHECK-LABEL: test_vloxseg2_nxv4f64_triscv.vector.tuple_nxv32i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg2ei16.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.nxv4i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, i32 %vl, i32 6)
@@ -11145,8 +11145,8 @@ define <vscale x 4 x double> @test_vloxseg2_mask_nxv4f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vloxseg2_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg2ei16.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -11159,8 +11159,8 @@ define <vscale x 4 x double> @test_vloxseg2_nxv4f64_triscv.vector.tuple_nxv32i8_
; CHECK-LABEL: test_vloxseg2_nxv4f64_triscv.vector.tuple_nxv32i8_2t_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei32.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxseg2ei32.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.nxv4i32(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i32> %index, i32 %vl, i32 6)
@@ -11172,8 +11172,8 @@ define <vscale x 4 x double> @test_vloxseg2_mask_nxv4f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vloxseg2_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei32.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxseg2ei32.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1.nxv4i32(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i32> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -11267,8 +11267,8 @@ define <vscale x 2 x double> @test_vloxseg3_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vloxseg3_nxv2f64_triscv.vector.tuple_nxv16i8_3t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i8> %index, i32 %vl, i32 6)
@@ -11280,8 +11280,8 @@ define <vscale x 2 x double> @test_vloxseg3_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vloxseg3_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i8> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -11294,8 +11294,8 @@ define <vscale x 2 x double> @test_vloxseg3_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vloxseg3_nxv2f64_triscv.vector.tuple_nxv16i8_3t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei16.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i16> %index, i32 %vl, i32 6)
@@ -11307,8 +11307,8 @@ define <vscale x 2 x double> @test_vloxseg3_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vloxseg3_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei16.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i16> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -11321,8 +11321,8 @@ define <vscale x 2 x double> @test_vloxseg3_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vloxseg3_nxv2f64_triscv.vector.tuple_nxv16i8_3t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei32.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei32.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i32> %index, i32 %vl, i32 6)
@@ -11334,8 +11334,8 @@ define <vscale x 2 x double> @test_vloxseg3_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vloxseg3_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei32.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei32.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i32> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -11429,8 +11429,8 @@ define <vscale x 2 x double> @test_vloxseg4_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vloxseg4_nxv2f64_triscv.vector.tuple_nxv16i8_4t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei8.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i8> %index, i32 %vl, i32 6)
@@ -11442,8 +11442,8 @@ define <vscale x 2 x double> @test_vloxseg4_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vloxseg4_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei8.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i8> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -11456,8 +11456,8 @@ define <vscale x 2 x double> @test_vloxseg4_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vloxseg4_nxv2f64_triscv.vector.tuple_nxv16i8_4t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei16.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i16> %index, i32 %vl, i32 6)
@@ -11469,8 +11469,8 @@ define <vscale x 2 x double> @test_vloxseg4_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vloxseg4_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei16.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i16> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -11483,8 +11483,8 @@ define <vscale x 2 x double> @test_vloxseg4_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vloxseg4_nxv2f64_triscv.vector.tuple_nxv16i8_4t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei32.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei32.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i32> %index, i32 %vl, i32 6)
@@ -11496,8 +11496,8 @@ define <vscale x 2 x double> @test_vloxseg4_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vloxseg4_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei32.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei32.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i32> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -12077,8 +12077,8 @@ define <vscale x 8 x bfloat> @test_vloxseg2_nxv8bf16_triscv.vector.tuple_nxv16i8
; CHECK-LABEL: test_vloxseg2_nxv8bf16_triscv.vector.tuple_nxv16i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, i32 %vl, i32 4)
@@ -12090,8 +12090,8 @@ define <vscale x 8 x bfloat> @test_vloxseg2_mask_nxv8bf16_triscv.vector.tuple_nx
; CHECK-LABEL: test_vloxseg2_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4)
@@ -12158,8 +12158,8 @@ define <vscale x 16 x bfloat> @test_vloxseg2_nxv16bf16_triscv.vector.tuple_nxv32
; CHECK-LABEL: test_vloxseg2_nxv16bf16_triscv.vector.tuple_nxv32i8_2t_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.nxv16i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i8> %index, i32 %vl, i32 4)
@@ -12171,8 +12171,8 @@ define <vscale x 16 x bfloat> @test_vloxseg2_mask_nxv16bf16_triscv.vector.tuple_
; CHECK-LABEL: test_vloxseg2_mask_nxv16bf16_triscv.vector.tuple_nxv32i8_2t_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1.nxv16i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i8> %index, <vscale x 16 x i1> %mask, i32 %vl, i32 1, i32 4)
@@ -12482,8 +12482,8 @@ define <vscale x 8 x bfloat> @test_vloxseg3_nxv8bf16_triscv.vector.tuple_nxv16i8
; CHECK-LABEL: test_vloxseg3_nxv8bf16_triscv.vector.tuple_nxv16i8_3t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i8> %index, i32 %vl, i32 4)
@@ -12495,8 +12495,8 @@ define <vscale x 8 x bfloat> @test_vloxseg3_mask_nxv8bf16_triscv.vector.tuple_nx
; CHECK-LABEL: test_vloxseg3_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_3t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4)
@@ -12806,8 +12806,8 @@ define <vscale x 8 x bfloat> @test_vloxseg4_nxv8bf16_triscv.vector.tuple_nxv16i8
; CHECK-LABEL: test_vloxseg4_nxv8bf16_triscv.vector.tuple_nxv16i8_4t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei8.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i8> %index, i32 %vl, i32 4)
@@ -12819,8 +12819,8 @@ define <vscale x 8 x bfloat> @test_vloxseg4_mask_nxv8bf16_triscv.vector.tuple_nx
; CHECK-LABEL: test_vloxseg4_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_4t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei8.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv64.ll
index 7b1d545ff9e9e..5b44c1f7e142b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv64.ll
@@ -3924,8 +3924,8 @@ define <vscale x 8 x i16> @test_vloxseg2_nxv8i16_triscv.vector.tuple_nxv16i8_2t_
; CHECK-LABEL: test_vloxseg2_nxv8i16_triscv.vector.tuple_nxv16i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, i64 %vl, i64 4)
@@ -3937,8 +3937,8 @@ define <vscale x 8 x i16> @test_vloxseg2_mask_nxv8i16_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg2_mask_nxv8i16_triscv.vector.tuple_nxv16i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4)
@@ -4040,8 +4040,8 @@ define <vscale x 16 x i16> @test_vloxseg2_nxv16i16_triscv.vector.tuple_nxv32i8_2
; CHECK-LABEL: test_vloxseg2_nxv16i16_triscv.vector.tuple_nxv32i8_2t_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.nxv16i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i8> %index, i64 %vl, i64 4)
@@ -4053,8 +4053,8 @@ define <vscale x 16 x i16> @test_vloxseg2_mask_nxv16i16_triscv.vector.tuple_nxv3
; CHECK-LABEL: test_vloxseg2_mask_nxv16i16_triscv.vector.tuple_nxv32i8_2t_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1.nxv16i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i8> %index, <vscale x 16 x i1> %mask, i64 %vl, i64 1, i64 4)
@@ -4475,8 +4475,8 @@ define <vscale x 8 x i16> @test_vloxseg3_nxv8i16_triscv.vector.tuple_nxv16i8_3t_
; CHECK-LABEL: test_vloxseg3_nxv8i16_triscv.vector.tuple_nxv16i8_3t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i8> %index, i64 %vl, i64 4)
@@ -4488,8 +4488,8 @@ define <vscale x 8 x i16> @test_vloxseg3_mask_nxv8i16_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg3_mask_nxv8i16_triscv.vector.tuple_nxv16i8_3t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4)
@@ -4939,8 +4939,8 @@ define <vscale x 8 x i16> @test_vloxseg4_nxv8i16_triscv.vector.tuple_nxv16i8_4t_
; CHECK-LABEL: test_vloxseg4_nxv8i16_triscv.vector.tuple_nxv16i8_4t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei8.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i8> %index, i64 %vl, i64 4)
@@ -4952,8 +4952,8 @@ define <vscale x 8 x i16> @test_vloxseg4_mask_nxv8i16_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg4_mask_nxv8i16_triscv.vector.tuple_nxv16i8_4t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei8.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4)
@@ -6679,8 +6679,8 @@ define <vscale x 4 x i32> @test_vloxseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t_
; CHECK-LABEL: test_vloxseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, i64 %vl, i64 5)
@@ -6692,8 +6692,8 @@ define <vscale x 4 x i32> @test_vloxseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -6708,8 +6708,8 @@ define <vscale x 4 x i32> @test_vloxseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t_
; CHECK-LABEL: test_vloxseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei16.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, i64 %vl, i64 5)
@@ -6721,8 +6721,8 @@ define <vscale x 4 x i32> @test_vloxseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei16.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -6795,8 +6795,8 @@ define <vscale x 8 x i32> @test_vloxseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t_
; CHECK-LABEL: test_vloxseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.nxv8i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, i64 %vl, i64 5)
@@ -6808,8 +6808,8 @@ define <vscale x 8 x i32> @test_vloxseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i
; CHECK-LABEL: test_vloxseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -6824,8 +6824,8 @@ define <vscale x 8 x i32> @test_vloxseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t_
; CHECK-LABEL: test_vloxseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxseg2ei16.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.nxv8i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i16> %index, i64 %vl, i64 5)
@@ -6837,8 +6837,8 @@ define <vscale x 8 x i32> @test_vloxseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i
; CHECK-LABEL: test_vloxseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i8_2t_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxseg2ei16.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1.nxv8i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i16> %index, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -7143,8 +7143,8 @@ define <vscale x 4 x i32> @test_vloxseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t_
; CHECK-LABEL: test_vloxseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i8> %index, i64 %vl, i64 5)
@@ -7156,8 +7156,8 @@ define <vscale x 4 x i32> @test_vloxseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i8_3t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -7172,8 +7172,8 @@ define <vscale x 4 x i32> @test_vloxseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t_
; CHECK-LABEL: test_vloxseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei16.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i16> %index, i64 %vl, i64 5)
@@ -7185,8 +7185,8 @@ define <vscale x 4 x i32> @test_vloxseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i8_3t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei16.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -7491,8 +7491,8 @@ define <vscale x 4 x i32> @test_vloxseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t_
; CHECK-LABEL: test_vloxseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei8.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i8> %index, i64 %vl, i64 5)
@@ -7504,8 +7504,8 @@ define <vscale x 4 x i32> @test_vloxseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i8_4t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei8.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -7520,8 +7520,8 @@ define <vscale x 4 x i32> @test_vloxseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t_
; CHECK-LABEL: test_vloxseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei16.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i16> %index, i64 %vl, i64 5)
@@ -7533,8 +7533,8 @@ define <vscale x 4 x i32> @test_vloxseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i8_4t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei16.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -8651,8 +8651,8 @@ define <vscale x 2 x i64> @test_vloxseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t_
; CHECK-LABEL: test_vloxseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i8> %index, i64 %vl, i64 6)
@@ -8664,8 +8664,8 @@ define <vscale x 2 x i64> @test_vloxseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i8> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -8680,8 +8680,8 @@ define <vscale x 2 x i64> @test_vloxseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t_
; CHECK-LABEL: test_vloxseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei16.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i16> %index, i64 %vl, i64 6)
@@ -8693,8 +8693,8 @@ define <vscale x 2 x i64> @test_vloxseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei16.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i16> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -8709,8 +8709,8 @@ define <vscale x 2 x i64> @test_vloxseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t_
; CHECK-LABEL: test_vloxseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei32.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei32.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i32> %index, i64 %vl, i64 6)
@@ -8722,8 +8722,8 @@ define <vscale x 2 x i64> @test_vloxseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei32.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei32.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i32> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -8767,8 +8767,8 @@ define <vscale x 4 x i64> @test_vloxseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t_
; CHECK-LABEL: test_vloxseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.nxv4i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, i64 %vl, i64 6)
@@ -8780,8 +8780,8 @@ define <vscale x 4 x i64> @test_vloxseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i
; CHECK-LABEL: test_vloxseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -8796,8 +8796,8 @@ define <vscale x 4 x i64> @test_vloxseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t_
; CHECK-LABEL: test_vloxseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg2ei16.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.nxv4i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, i64 %vl, i64 6)
@@ -8809,8 +8809,8 @@ define <vscale x 4 x i64> @test_vloxseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i
; CHECK-LABEL: test_vloxseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg2ei16.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -8825,8 +8825,8 @@ define <vscale x 4 x i64> @test_vloxseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t_
; CHECK-LABEL: test_vloxseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei32.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxseg2ei32.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.nxv4i32(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i32> %index, i64 %vl, i64 6)
@@ -8838,8 +8838,8 @@ define <vscale x 4 x i64> @test_vloxseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i
; CHECK-LABEL: test_vloxseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei32.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxseg2ei32.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1.nxv4i32(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i32> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -8999,8 +8999,8 @@ define <vscale x 2 x i64> @test_vloxseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t_
; CHECK-LABEL: test_vloxseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i8> %index, i64 %vl, i64 6)
@@ -9012,8 +9012,8 @@ define <vscale x 2 x i64> @test_vloxseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i8> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -9028,8 +9028,8 @@ define <vscale x 2 x i64> @test_vloxseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t_
; CHECK-LABEL: test_vloxseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei16.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i16> %index, i64 %vl, i64 6)
@@ -9041,8 +9041,8 @@ define <vscale x 2 x i64> @test_vloxseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei16.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i16> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -9057,8 +9057,8 @@ define <vscale x 2 x i64> @test_vloxseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t_
; CHECK-LABEL: test_vloxseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei32.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei32.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i32> %index, i64 %vl, i64 6)
@@ -9070,8 +9070,8 @@ define <vscale x 2 x i64> @test_vloxseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei32.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei32.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i32> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -9231,8 +9231,8 @@ define <vscale x 2 x i64> @test_vloxseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t_
; CHECK-LABEL: test_vloxseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei8.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i8> %index, i64 %vl, i64 6)
@@ -9244,8 +9244,8 @@ define <vscale x 2 x i64> @test_vloxseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei8.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i8> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -9260,8 +9260,8 @@ define <vscale x 2 x i64> @test_vloxseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t_
; CHECK-LABEL: test_vloxseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei16.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i16> %index, i64 %vl, i64 6)
@@ -9273,8 +9273,8 @@ define <vscale x 2 x i64> @test_vloxseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei16.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i16> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -9289,8 +9289,8 @@ define <vscale x 2 x i64> @test_vloxseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t_
; CHECK-LABEL: test_vloxseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei32.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei32.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i32> %index, i64 %vl, i64 6)
@@ -9302,8 +9302,8 @@ define <vscale x 2 x i64> @test_vloxseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vloxseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei32.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei32.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i32> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -10133,8 +10133,8 @@ define <vscale x 8 x half> @test_vloxseg2_nxv8f16_triscv.vector.tuple_nxv16i8_2t
; CHECK-LABEL: test_vloxseg2_nxv8f16_triscv.vector.tuple_nxv16i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, i64 %vl, i64 4)
@@ -10146,8 +10146,8 @@ define <vscale x 8 x half> @test_vloxseg2_mask_nxv8f16_triscv.vector.tuple_nxv16
; CHECK-LABEL: test_vloxseg2_mask_nxv8f16_triscv.vector.tuple_nxv16i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4)
@@ -10241,8 +10241,8 @@ define <vscale x 16 x half> @test_vloxseg2_nxv16f16_triscv.vector.tuple_nxv32i8_
; CHECK-LABEL: test_vloxseg2_nxv16f16_triscv.vector.tuple_nxv32i8_2t_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.nxv16i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i8> %index, i64 %vl, i64 4)
@@ -10254,8 +10254,8 @@ define <vscale x 16 x half> @test_vloxseg2_mask_nxv16f16_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vloxseg2_mask_nxv16f16_triscv.vector.tuple_nxv32i8_2t_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1.nxv16i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i8> %index, <vscale x 16 x i1> %mask, i64 %vl, i64 1, i64 4)
@@ -10646,8 +10646,8 @@ define <vscale x 8 x half> @test_vloxseg3_nxv8f16_triscv.vector.tuple_nxv16i8_3t
; CHECK-LABEL: test_vloxseg3_nxv8f16_triscv.vector.tuple_nxv16i8_3t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i8> %index, i64 %vl, i64 4)
@@ -10659,8 +10659,8 @@ define <vscale x 8 x half> @test_vloxseg3_mask_nxv8f16_triscv.vector.tuple_nxv16
; CHECK-LABEL: test_vloxseg3_mask_nxv8f16_triscv.vector.tuple_nxv16i8_3t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4)
@@ -11078,8 +11078,8 @@ define <vscale x 8 x half> @test_vloxseg4_nxv8f16_triscv.vector.tuple_nxv16i8_4t
; CHECK-LABEL: test_vloxseg4_nxv8f16_triscv.vector.tuple_nxv16i8_4t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei8.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i8> %index, i64 %vl, i64 4)
@@ -11091,8 +11091,8 @@ define <vscale x 8 x half> @test_vloxseg4_mask_nxv8f16_triscv.vector.tuple_nxv16
; CHECK-LABEL: test_vloxseg4_mask_nxv8f16_triscv.vector.tuple_nxv16i8_4t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei8.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4)
@@ -12698,8 +12698,8 @@ define <vscale x 4 x float> @test_vloxseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2
; CHECK-LABEL: test_vloxseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, i64 %vl, i64 5)
@@ -12711,8 +12711,8 @@ define <vscale x 4 x float> @test_vloxseg2_mask_nxv4f32_triscv.vector.tuple_nxv1
; CHECK-LABEL: test_vloxseg2_mask_nxv4f32_triscv.vector.tuple_nxv16i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -12725,8 +12725,8 @@ define <vscale x 4 x float> @test_vloxseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2
; CHECK-LABEL: test_vloxseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei16.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, i64 %vl, i64 5)
@@ -12738,8 +12738,8 @@ define <vscale x 4 x float> @test_vloxseg2_mask_nxv4f32_triscv.vector.tuple_nxv1
; CHECK-LABEL: test_vloxseg2_mask_nxv4f32_triscv.vector.tuple_nxv16i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei16.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -12806,8 +12806,8 @@ define <vscale x 8 x float> @test_vloxseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2
; CHECK-LABEL: test_vloxseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.nxv8i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, i64 %vl, i64 5)
@@ -12819,8 +12819,8 @@ define <vscale x 8 x float> @test_vloxseg2_mask_nxv8f32_triscv.vector.tuple_nxv3
; CHECK-LABEL: test_vloxseg2_mask_nxv8f32_triscv.vector.tuple_nxv32i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -12833,8 +12833,8 @@ define <vscale x 8 x float> @test_vloxseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2
; CHECK-LABEL: test_vloxseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2t_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxseg2ei16.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.nxv8i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i16> %index, i64 %vl, i64 5)
@@ -12846,8 +12846,8 @@ define <vscale x 8 x float> @test_vloxseg2_mask_nxv8f32_triscv.vector.tuple_nxv3
; CHECK-LABEL: test_vloxseg2_mask_nxv8f32_triscv.vector.tuple_nxv32i8_2t_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxseg2ei16.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1.nxv8i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i16> %index, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -13130,8 +13130,8 @@ define <vscale x 4 x float> @test_vloxseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3
; CHECK-LABEL: test_vloxseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i8> %index, i64 %vl, i64 5)
@@ -13143,8 +13143,8 @@ define <vscale x 4 x float> @test_vloxseg3_mask_nxv4f32_triscv.vector.tuple_nxv1
; CHECK-LABEL: test_vloxseg3_mask_nxv4f32_triscv.vector.tuple_nxv16i8_3t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -13157,8 +13157,8 @@ define <vscale x 4 x float> @test_vloxseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3
; CHECK-LABEL: test_vloxseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei16.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i16> %index, i64 %vl, i64 5)
@@ -13170,8 +13170,8 @@ define <vscale x 4 x float> @test_vloxseg3_mask_nxv4f32_triscv.vector.tuple_nxv1
; CHECK-LABEL: test_vloxseg3_mask_nxv4f32_triscv.vector.tuple_nxv16i8_3t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei16.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -13454,8 +13454,8 @@ define <vscale x 4 x float> @test_vloxseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4
; CHECK-LABEL: test_vloxseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei8.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i8> %index, i64 %vl, i64 5)
@@ -13467,8 +13467,8 @@ define <vscale x 4 x float> @test_vloxseg4_mask_nxv4f32_triscv.vector.tuple_nxv1
; CHECK-LABEL: test_vloxseg4_mask_nxv4f32_triscv.vector.tuple_nxv16i8_4t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei8.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -13481,8 +13481,8 @@ define <vscale x 4 x float> @test_vloxseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4
; CHECK-LABEL: test_vloxseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei16.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i16> %index, i64 %vl, i64 5)
@@ -13494,8 +13494,8 @@ define <vscale x 4 x float> @test_vloxseg4_mask_nxv4f32_triscv.vector.tuple_nxv1
; CHECK-LABEL: test_vloxseg4_mask_nxv4f32_triscv.vector.tuple_nxv16i8_4t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei16.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -14534,8 +14534,8 @@ define <vscale x 2 x double> @test_vloxseg2_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vloxseg2_nxv2f64_triscv.vector.tuple_nxv16i8_2t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i8> %index, i64 %vl, i64 6)
@@ -14547,8 +14547,8 @@ define <vscale x 2 x double> @test_vloxseg2_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vloxseg2_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i8> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -14561,8 +14561,8 @@ define <vscale x 2 x double> @test_vloxseg2_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vloxseg2_nxv2f64_triscv.vector.tuple_nxv16i8_2t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei16.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i16> %index, i64 %vl, i64 6)
@@ -14574,8 +14574,8 @@ define <vscale x 2 x double> @test_vloxseg2_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vloxseg2_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei16.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i16> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -14588,8 +14588,8 @@ define <vscale x 2 x double> @test_vloxseg2_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vloxseg2_nxv2f64_triscv.vector.tuple_nxv16i8_2t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei32.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei32.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i32> %index, i64 %vl, i64 6)
@@ -14601,8 +14601,8 @@ define <vscale x 2 x double> @test_vloxseg2_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vloxseg2_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei32.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei32.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i32> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -14642,8 +14642,8 @@ define <vscale x 4 x double> @test_vloxseg2_nxv4f64_triscv.vector.tuple_nxv32i8_
; CHECK-LABEL: test_vloxseg2_nxv4f64_triscv.vector.tuple_nxv32i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.nxv4i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, i64 %vl, i64 6)
@@ -14655,8 +14655,8 @@ define <vscale x 4 x double> @test_vloxseg2_mask_nxv4f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vloxseg2_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -14669,8 +14669,8 @@ define <vscale x 4 x double> @test_vloxseg2_nxv4f64_triscv.vector.tuple_nxv32i8_
; CHECK-LABEL: test_vloxseg2_nxv4f64_triscv.vector.tuple_nxv32i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg2ei16.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.nxv4i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, i64 %vl, i64 6)
@@ -14682,8 +14682,8 @@ define <vscale x 4 x double> @test_vloxseg2_mask_nxv4f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vloxseg2_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei16.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg2ei16.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -14696,8 +14696,8 @@ define <vscale x 4 x double> @test_vloxseg2_nxv4f64_triscv.vector.tuple_nxv32i8_
; CHECK-LABEL: test_vloxseg2_nxv4f64_triscv.vector.tuple_nxv32i8_2t_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei32.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxseg2ei32.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.nxv4i32(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i32> %index, i64 %vl, i64 6)
@@ -14709,8 +14709,8 @@ define <vscale x 4 x double> @test_vloxseg2_mask_nxv4f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vloxseg2_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei32.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxseg2ei32.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1.nxv4i32(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i32> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -14858,8 +14858,8 @@ define <vscale x 2 x double> @test_vloxseg3_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vloxseg3_nxv2f64_triscv.vector.tuple_nxv16i8_3t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i8> %index, i64 %vl, i64 6)
@@ -14871,8 +14871,8 @@ define <vscale x 2 x double> @test_vloxseg3_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vloxseg3_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i8> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -14885,8 +14885,8 @@ define <vscale x 2 x double> @test_vloxseg3_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vloxseg3_nxv2f64_triscv.vector.tuple_nxv16i8_3t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei16.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i16> %index, i64 %vl, i64 6)
@@ -14898,8 +14898,8 @@ define <vscale x 2 x double> @test_vloxseg3_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vloxseg3_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei16.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i16> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -14912,8 +14912,8 @@ define <vscale x 2 x double> @test_vloxseg3_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vloxseg3_nxv2f64_triscv.vector.tuple_nxv16i8_3t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei32.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei32.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i32> %index, i64 %vl, i64 6)
@@ -14925,8 +14925,8 @@ define <vscale x 2 x double> @test_vloxseg3_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vloxseg3_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei32.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei32.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i32> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -15074,8 +15074,8 @@ define <vscale x 2 x double> @test_vloxseg4_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vloxseg4_nxv2f64_triscv.vector.tuple_nxv16i8_4t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei8.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i8> %index, i64 %vl, i64 6)
@@ -15087,8 +15087,8 @@ define <vscale x 2 x double> @test_vloxseg4_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vloxseg4_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei8.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i8> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -15101,8 +15101,8 @@ define <vscale x 2 x double> @test_vloxseg4_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vloxseg4_nxv2f64_triscv.vector.tuple_nxv16i8_4t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei16.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i16> %index, i64 %vl, i64 6)
@@ -15114,8 +15114,8 @@ define <vscale x 2 x double> @test_vloxseg4_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vloxseg4_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei16.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i16> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -15128,8 +15128,8 @@ define <vscale x 2 x double> @test_vloxseg4_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vloxseg4_nxv2f64_triscv.vector.tuple_nxv16i8_4t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei32.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei32.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i32> %index, i64 %vl, i64 6)
@@ -15141,8 +15141,8 @@ define <vscale x 2 x double> @test_vloxseg4_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vloxseg4_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei32.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei32.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i32> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -15938,8 +15938,8 @@ define <vscale x 8 x bfloat> @test_vloxseg2_nxv8bf16_triscv.vector.tuple_nxv16i8
; CHECK-LABEL: test_vloxseg2_nxv8bf16_triscv.vector.tuple_nxv16i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, i64 %vl, i64 4)
@@ -15951,8 +15951,8 @@ define <vscale x 8 x bfloat> @test_vloxseg2_mask_nxv8bf16_triscv.vector.tuple_nx
; CHECK-LABEL: test_vloxseg2_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4)
@@ -16046,8 +16046,8 @@ define <vscale x 16 x bfloat> @test_vloxseg2_nxv16bf16_triscv.vector.tuple_nxv32
; CHECK-LABEL: test_vloxseg2_nxv16bf16_triscv.vector.tuple_nxv32i8_2t_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.nxv16i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i8> %index, i64 %vl, i64 4)
@@ -16059,8 +16059,8 @@ define <vscale x 16 x bfloat> @test_vloxseg2_mask_nxv16bf16_triscv.vector.tuple_
; CHECK-LABEL: test_vloxseg2_mask_nxv16bf16_triscv.vector.tuple_nxv32i8_2t_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vloxseg2ei8.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1.nxv16i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i8> %index, <vscale x 16 x i1> %mask, i64 %vl, i64 1, i64 4)
@@ -16451,8 +16451,8 @@ define <vscale x 8 x bfloat> @test_vloxseg3_nxv8bf16_triscv.vector.tuple_nxv16i8
; CHECK-LABEL: test_vloxseg3_nxv8bf16_triscv.vector.tuple_nxv16i8_3t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i8> %index, i64 %vl, i64 4)
@@ -16464,8 +16464,8 @@ define <vscale x 8 x bfloat> @test_vloxseg3_mask_nxv8bf16_triscv.vector.tuple_nx
; CHECK-LABEL: test_vloxseg3_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_3t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg3ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4)
@@ -16883,8 +16883,8 @@ define <vscale x 8 x bfloat> @test_vloxseg4_nxv8bf16_triscv.vector.tuple_nxv16i8
; CHECK-LABEL: test_vloxseg4_nxv8bf16_triscv.vector.tuple_nxv16i8_4t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei8.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i8> %index, i64 %vl, i64 4)
@@ -16896,8 +16896,8 @@ define <vscale x 8 x bfloat> @test_vloxseg4_mask_nxv8bf16_triscv.vector.tuple_nx
; CHECK-LABEL: test_vloxseg4_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_4t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vloxseg4ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vloxseg4ei8.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv32.ll
index d1ca40bcc0db3..e648068fb721b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv32.ll
@@ -3025,8 +3025,8 @@ define <vscale x 8 x i16> @test_vluxseg2_nxv8i16_triscv.vector.tuple_nxv16i8_2t_
; CHECK-LABEL: test_vluxseg2_nxv8i16_triscv.vector.tuple_nxv16i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, i32 %vl, i32 4)
@@ -3038,8 +3038,8 @@ define <vscale x 8 x i16> @test_vluxseg2_mask_nxv8i16_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg2_mask_nxv8i16_triscv.vector.tuple_nxv16i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4)
@@ -3112,8 +3112,8 @@ define <vscale x 16 x i16> @test_vluxseg2_nxv16i16_triscv.vector.tuple_nxv32i8_2
; CHECK-LABEL: test_vluxseg2_nxv16i16_triscv.vector.tuple_nxv32i8_2t_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.nxv16i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i8> %index, i32 %vl, i32 4)
@@ -3125,8 +3125,8 @@ define <vscale x 16 x i16> @test_vluxseg2_mask_nxv16i16_triscv.vector.tuple_nxv3
; CHECK-LABEL: test_vluxseg2_mask_nxv16i16_triscv.vector.tuple_nxv32i8_2t_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1.nxv16i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i8> %index, <vscale x 16 x i1> %mask, i32 %vl, i32 1, i32 4)
@@ -3460,8 +3460,8 @@ define <vscale x 8 x i16> @test_vluxseg3_nxv8i16_triscv.vector.tuple_nxv16i8_3t_
; CHECK-LABEL: test_vluxseg3_nxv8i16_triscv.vector.tuple_nxv16i8_3t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i8> %index, i32 %vl, i32 4)
@@ -3473,8 +3473,8 @@ define <vscale x 8 x i16> @test_vluxseg3_mask_nxv8i16_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg3_mask_nxv8i16_triscv.vector.tuple_nxv16i8_3t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4)
@@ -3808,8 +3808,8 @@ define <vscale x 8 x i16> @test_vluxseg4_nxv8i16_triscv.vector.tuple_nxv16i8_4t_
; CHECK-LABEL: test_vluxseg4_nxv8i16_triscv.vector.tuple_nxv16i8_4t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei8.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i8> %index, i32 %vl, i32 4)
@@ -3821,8 +3821,8 @@ define <vscale x 8 x i16> @test_vluxseg4_mask_nxv8i16_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg4_mask_nxv8i16_triscv.vector.tuple_nxv16i8_4t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei8.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4)
@@ -5113,8 +5113,8 @@ define <vscale x 4 x i32> @test_vluxseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t_
; CHECK-LABEL: test_vluxseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, i32 %vl, i32 5)
@@ -5126,8 +5126,8 @@ define <vscale x 4 x i32> @test_vluxseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -5142,8 +5142,8 @@ define <vscale x 4 x i32> @test_vluxseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t_
; CHECK-LABEL: test_vluxseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei16.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, i32 %vl, i32 5)
@@ -5155,8 +5155,8 @@ define <vscale x 4 x i32> @test_vluxseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei16.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -5200,8 +5200,8 @@ define <vscale x 8 x i32> @test_vluxseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t_
; CHECK-LABEL: test_vluxseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.nxv8i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, i32 %vl, i32 5)
@@ -5213,8 +5213,8 @@ define <vscale x 8 x i32> @test_vluxseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i
; CHECK-LABEL: test_vluxseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -5229,8 +5229,8 @@ define <vscale x 8 x i32> @test_vluxseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t_
; CHECK-LABEL: test_vluxseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxseg2ei16.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.nxv8i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i16> %index, i32 %vl, i32 5)
@@ -5242,8 +5242,8 @@ define <vscale x 8 x i32> @test_vluxseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i
; CHECK-LABEL: test_vluxseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i8_2t_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxseg2ei16.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1.nxv8i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i16> %index, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -5461,8 +5461,8 @@ define <vscale x 4 x i32> @test_vluxseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t_
; CHECK-LABEL: test_vluxseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i8> %index, i32 %vl, i32 5)
@@ -5474,8 +5474,8 @@ define <vscale x 4 x i32> @test_vluxseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i8_3t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -5490,8 +5490,8 @@ define <vscale x 4 x i32> @test_vluxseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t_
; CHECK-LABEL: test_vluxseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei16.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i16> %index, i32 %vl, i32 5)
@@ -5503,8 +5503,8 @@ define <vscale x 4 x i32> @test_vluxseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i8_3t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei16.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -5722,8 +5722,8 @@ define <vscale x 4 x i32> @test_vluxseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t_
; CHECK-LABEL: test_vluxseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei8.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i8> %index, i32 %vl, i32 5)
@@ -5735,8 +5735,8 @@ define <vscale x 4 x i32> @test_vluxseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i8_4t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei8.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -5751,8 +5751,8 @@ define <vscale x 4 x i32> @test_vluxseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t_
; CHECK-LABEL: test_vluxseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei16.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i16> %index, i32 %vl, i32 5)
@@ -5764,8 +5764,8 @@ define <vscale x 4 x i32> @test_vluxseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i8_4t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei16.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -6592,8 +6592,8 @@ define <vscale x 2 x i64> @test_vluxseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t_
; CHECK-LABEL: test_vluxseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i8> %index, i32 %vl, i32 6)
@@ -6605,8 +6605,8 @@ define <vscale x 2 x i64> @test_vluxseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i8> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -6621,8 +6621,8 @@ define <vscale x 2 x i64> @test_vluxseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t_
; CHECK-LABEL: test_vluxseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei16.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i16> %index, i32 %vl, i32 6)
@@ -6634,8 +6634,8 @@ define <vscale x 2 x i64> @test_vluxseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei16.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i16> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -6650,8 +6650,8 @@ define <vscale x 2 x i64> @test_vluxseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t_
; CHECK-LABEL: test_vluxseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei32.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei32.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i32> %index, i32 %vl, i32 6)
@@ -6663,8 +6663,8 @@ define <vscale x 2 x i64> @test_vluxseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei32.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei32.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i32> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -6679,8 +6679,8 @@ define <vscale x 4 x i64> @test_vluxseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t_
; CHECK-LABEL: test_vluxseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.nxv4i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, i32 %vl, i32 6)
@@ -6692,8 +6692,8 @@ define <vscale x 4 x i64> @test_vluxseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i
; CHECK-LABEL: test_vluxseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -6708,8 +6708,8 @@ define <vscale x 4 x i64> @test_vluxseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t_
; CHECK-LABEL: test_vluxseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg2ei16.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.nxv4i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, i32 %vl, i32 6)
@@ -6721,8 +6721,8 @@ define <vscale x 4 x i64> @test_vluxseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i
; CHECK-LABEL: test_vluxseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg2ei16.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -6737,8 +6737,8 @@ define <vscale x 4 x i64> @test_vluxseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t_
; CHECK-LABEL: test_vluxseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei32.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxseg2ei32.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.nxv4i32(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i32> %index, i32 %vl, i32 6)
@@ -6750,8 +6750,8 @@ define <vscale x 4 x i64> @test_vluxseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i
; CHECK-LABEL: test_vluxseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei32.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxseg2ei32.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1.nxv4i32(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i32> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -6853,8 +6853,8 @@ define <vscale x 2 x i64> @test_vluxseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t_
; CHECK-LABEL: test_vluxseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i8> %index, i32 %vl, i32 6)
@@ -6866,8 +6866,8 @@ define <vscale x 2 x i64> @test_vluxseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i8> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -6882,8 +6882,8 @@ define <vscale x 2 x i64> @test_vluxseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t_
; CHECK-LABEL: test_vluxseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei16.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i16> %index, i32 %vl, i32 6)
@@ -6895,8 +6895,8 @@ define <vscale x 2 x i64> @test_vluxseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei16.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i16> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -6911,8 +6911,8 @@ define <vscale x 2 x i64> @test_vluxseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t_
; CHECK-LABEL: test_vluxseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei32.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei32.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i32> %index, i32 %vl, i32 6)
@@ -6924,8 +6924,8 @@ define <vscale x 2 x i64> @test_vluxseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei32.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei32.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i32> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -7027,8 +7027,8 @@ define <vscale x 2 x i64> @test_vluxseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t_
; CHECK-LABEL: test_vluxseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei8.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i8> %index, i32 %vl, i32 6)
@@ -7040,8 +7040,8 @@ define <vscale x 2 x i64> @test_vluxseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei8.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i8> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -7056,8 +7056,8 @@ define <vscale x 2 x i64> @test_vluxseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t_
; CHECK-LABEL: test_vluxseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei16.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i16> %index, i32 %vl, i32 6)
@@ -7069,8 +7069,8 @@ define <vscale x 2 x i64> @test_vluxseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei16.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i16> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -7085,8 +7085,8 @@ define <vscale x 2 x i64> @test_vluxseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t_
; CHECK-LABEL: test_vluxseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei32.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei32.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i32> %index, i32 %vl, i32 6)
@@ -7098,8 +7098,8 @@ define <vscale x 2 x i64> @test_vluxseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei32.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei32.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i32> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -7703,8 +7703,8 @@ define <vscale x 8 x half> @test_vluxseg2_nxv8f16_triscv.vector.tuple_nxv16i8_2t
; CHECK-LABEL: test_vluxseg2_nxv8f16_triscv.vector.tuple_nxv16i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, i32 %vl, i32 4)
@@ -7716,8 +7716,8 @@ define <vscale x 8 x half> @test_vluxseg2_mask_nxv8f16_triscv.vector.tuple_nxv16
; CHECK-LABEL: test_vluxseg2_mask_nxv8f16_triscv.vector.tuple_nxv16i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4)
@@ -7784,8 +7784,8 @@ define <vscale x 16 x half> @test_vluxseg2_nxv16f16_triscv.vector.tuple_nxv32i8_
; CHECK-LABEL: test_vluxseg2_nxv16f16_triscv.vector.tuple_nxv32i8_2t_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.nxv16i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i8> %index, i32 %vl, i32 4)
@@ -7797,8 +7797,8 @@ define <vscale x 16 x half> @test_vluxseg2_mask_nxv16f16_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vluxseg2_mask_nxv16f16_triscv.vector.tuple_nxv32i8_2t_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1.nxv16i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i8> %index, <vscale x 16 x i1> %mask, i32 %vl, i32 1, i32 4)
@@ -8108,8 +8108,8 @@ define <vscale x 8 x half> @test_vluxseg3_nxv8f16_triscv.vector.tuple_nxv16i8_3t
; CHECK-LABEL: test_vluxseg3_nxv8f16_triscv.vector.tuple_nxv16i8_3t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i8> %index, i32 %vl, i32 4)
@@ -8121,8 +8121,8 @@ define <vscale x 8 x half> @test_vluxseg3_mask_nxv8f16_triscv.vector.tuple_nxv16
; CHECK-LABEL: test_vluxseg3_mask_nxv8f16_triscv.vector.tuple_nxv16i8_3t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4)
@@ -8432,8 +8432,8 @@ define <vscale x 8 x half> @test_vluxseg4_nxv8f16_triscv.vector.tuple_nxv16i8_4t
; CHECK-LABEL: test_vluxseg4_nxv8f16_triscv.vector.tuple_nxv16i8_4t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei8.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i8> %index, i32 %vl, i32 4)
@@ -8445,8 +8445,8 @@ define <vscale x 8 x half> @test_vluxseg4_mask_nxv8f16_triscv.vector.tuple_nxv16
; CHECK-LABEL: test_vluxseg4_mask_nxv8f16_triscv.vector.tuple_nxv16i8_4t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei8.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4)
@@ -9647,8 +9647,8 @@ define <vscale x 4 x float> @test_vluxseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2
; CHECK-LABEL: test_vluxseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, i32 %vl, i32 5)
@@ -9660,8 +9660,8 @@ define <vscale x 4 x float> @test_vluxseg2_mask_nxv4f32_triscv.vector.tuple_nxv1
; CHECK-LABEL: test_vluxseg2_mask_nxv4f32_triscv.vector.tuple_nxv16i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -9674,8 +9674,8 @@ define <vscale x 4 x float> @test_vluxseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2
; CHECK-LABEL: test_vluxseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei16.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, i32 %vl, i32 5)
@@ -9687,8 +9687,8 @@ define <vscale x 4 x float> @test_vluxseg2_mask_nxv4f32_triscv.vector.tuple_nxv1
; CHECK-LABEL: test_vluxseg2_mask_nxv4f32_triscv.vector.tuple_nxv16i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei16.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -9728,8 +9728,8 @@ define <vscale x 8 x float> @test_vluxseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2
; CHECK-LABEL: test_vluxseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.nxv8i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, i32 %vl, i32 5)
@@ -9741,8 +9741,8 @@ define <vscale x 8 x float> @test_vluxseg2_mask_nxv8f32_triscv.vector.tuple_nxv3
; CHECK-LABEL: test_vluxseg2_mask_nxv8f32_triscv.vector.tuple_nxv32i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -9755,8 +9755,8 @@ define <vscale x 8 x float> @test_vluxseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2
; CHECK-LABEL: test_vluxseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2t_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxseg2ei16.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.nxv8i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i16> %index, i32 %vl, i32 5)
@@ -9768,8 +9768,8 @@ define <vscale x 8 x float> @test_vluxseg2_mask_nxv8f32_triscv.vector.tuple_nxv3
; CHECK-LABEL: test_vluxseg2_mask_nxv8f32_triscv.vector.tuple_nxv32i8_2t_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxseg2ei16.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1.nxv8i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i16> %index, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -9971,8 +9971,8 @@ define <vscale x 4 x float> @test_vluxseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3
; CHECK-LABEL: test_vluxseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i8> %index, i32 %vl, i32 5)
@@ -9984,8 +9984,8 @@ define <vscale x 4 x float> @test_vluxseg3_mask_nxv4f32_triscv.vector.tuple_nxv1
; CHECK-LABEL: test_vluxseg3_mask_nxv4f32_triscv.vector.tuple_nxv16i8_3t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -9998,8 +9998,8 @@ define <vscale x 4 x float> @test_vluxseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3
; CHECK-LABEL: test_vluxseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei16.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i16> %index, i32 %vl, i32 5)
@@ -10011,8 +10011,8 @@ define <vscale x 4 x float> @test_vluxseg3_mask_nxv4f32_triscv.vector.tuple_nxv1
; CHECK-LABEL: test_vluxseg3_mask_nxv4f32_triscv.vector.tuple_nxv16i8_3t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei16.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -10214,8 +10214,8 @@ define <vscale x 4 x float> @test_vluxseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4
; CHECK-LABEL: test_vluxseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei8.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i8> %index, i32 %vl, i32 5)
@@ -10227,8 +10227,8 @@ define <vscale x 4 x float> @test_vluxseg4_mask_nxv4f32_triscv.vector.tuple_nxv1
; CHECK-LABEL: test_vluxseg4_mask_nxv4f32_triscv.vector.tuple_nxv16i8_4t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei8.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -10241,8 +10241,8 @@ define <vscale x 4 x float> @test_vluxseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4
; CHECK-LABEL: test_vluxseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei16.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i16> %index, i32 %vl, i32 5)
@@ -10254,8 +10254,8 @@ define <vscale x 4 x float> @test_vluxseg4_mask_nxv4f32_triscv.vector.tuple_nxv1
; CHECK-LABEL: test_vluxseg4_mask_nxv4f32_triscv.vector.tuple_nxv16i8_4t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei16.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5)
@@ -11024,8 +11024,8 @@ define <vscale x 2 x double> @test_vluxseg2_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vluxseg2_nxv2f64_triscv.vector.tuple_nxv16i8_2t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i8> %index, i32 %vl, i32 6)
@@ -11037,8 +11037,8 @@ define <vscale x 2 x double> @test_vluxseg2_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vluxseg2_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i8> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -11051,8 +11051,8 @@ define <vscale x 2 x double> @test_vluxseg2_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vluxseg2_nxv2f64_triscv.vector.tuple_nxv16i8_2t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei16.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i16> %index, i32 %vl, i32 6)
@@ -11064,8 +11064,8 @@ define <vscale x 2 x double> @test_vluxseg2_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vluxseg2_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei16.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i16> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -11078,8 +11078,8 @@ define <vscale x 2 x double> @test_vluxseg2_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vluxseg2_nxv2f64_triscv.vector.tuple_nxv16i8_2t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei32.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei32.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i32> %index, i32 %vl, i32 6)
@@ -11091,8 +11091,8 @@ define <vscale x 2 x double> @test_vluxseg2_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vluxseg2_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei32.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei32.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i32> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -11105,8 +11105,8 @@ define <vscale x 4 x double> @test_vluxseg2_nxv4f64_triscv.vector.tuple_nxv32i8_
; CHECK-LABEL: test_vluxseg2_nxv4f64_triscv.vector.tuple_nxv32i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.nxv4i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, i32 %vl, i32 6)
@@ -11118,8 +11118,8 @@ define <vscale x 4 x double> @test_vluxseg2_mask_nxv4f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vluxseg2_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -11132,8 +11132,8 @@ define <vscale x 4 x double> @test_vluxseg2_nxv4f64_triscv.vector.tuple_nxv32i8_
; CHECK-LABEL: test_vluxseg2_nxv4f64_triscv.vector.tuple_nxv32i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg2ei16.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.nxv4i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, i32 %vl, i32 6)
@@ -11145,8 +11145,8 @@ define <vscale x 4 x double> @test_vluxseg2_mask_nxv4f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vluxseg2_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg2ei16.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -11159,8 +11159,8 @@ define <vscale x 4 x double> @test_vluxseg2_nxv4f64_triscv.vector.tuple_nxv32i8_
; CHECK-LABEL: test_vluxseg2_nxv4f64_triscv.vector.tuple_nxv32i8_2t_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei32.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxseg2ei32.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.nxv4i32(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i32> %index, i32 %vl, i32 6)
@@ -11172,8 +11172,8 @@ define <vscale x 4 x double> @test_vluxseg2_mask_nxv4f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vluxseg2_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei32.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxseg2ei32.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1.nxv4i32(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i32> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -11267,8 +11267,8 @@ define <vscale x 2 x double> @test_vluxseg3_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vluxseg3_nxv2f64_triscv.vector.tuple_nxv16i8_3t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i8> %index, i32 %vl, i32 6)
@@ -11280,8 +11280,8 @@ define <vscale x 2 x double> @test_vluxseg3_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vluxseg3_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i8> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -11294,8 +11294,8 @@ define <vscale x 2 x double> @test_vluxseg3_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vluxseg3_nxv2f64_triscv.vector.tuple_nxv16i8_3t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei16.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i16> %index, i32 %vl, i32 6)
@@ -11307,8 +11307,8 @@ define <vscale x 2 x double> @test_vluxseg3_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vluxseg3_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei16.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i16> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -11321,8 +11321,8 @@ define <vscale x 2 x double> @test_vluxseg3_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vluxseg3_nxv2f64_triscv.vector.tuple_nxv16i8_3t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei32.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei32.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i32> %index, i32 %vl, i32 6)
@@ -11334,8 +11334,8 @@ define <vscale x 2 x double> @test_vluxseg3_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vluxseg3_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei32.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei32.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i32> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -11429,8 +11429,8 @@ define <vscale x 2 x double> @test_vluxseg4_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vluxseg4_nxv2f64_triscv.vector.tuple_nxv16i8_4t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei8.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i8> %index, i32 %vl, i32 6)
@@ -11442,8 +11442,8 @@ define <vscale x 2 x double> @test_vluxseg4_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vluxseg4_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei8.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i8> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -11456,8 +11456,8 @@ define <vscale x 2 x double> @test_vluxseg4_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vluxseg4_nxv2f64_triscv.vector.tuple_nxv16i8_4t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei16.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i16> %index, i32 %vl, i32 6)
@@ -11469,8 +11469,8 @@ define <vscale x 2 x double> @test_vluxseg4_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vluxseg4_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei16.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i16> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -11483,8 +11483,8 @@ define <vscale x 2 x double> @test_vluxseg4_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vluxseg4_nxv2f64_triscv.vector.tuple_nxv16i8_4t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei32.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei32.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i32> %index, i32 %vl, i32 6)
@@ -11496,8 +11496,8 @@ define <vscale x 2 x double> @test_vluxseg4_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vluxseg4_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei32.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei32.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i32> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
@@ -12077,8 +12077,8 @@ define <vscale x 8 x bfloat> @test_vluxseg2_nxv8bf16_triscv.vector.tuple_nxv16i8
; CHECK-LABEL: test_vluxseg2_nxv8bf16_triscv.vector.tuple_nxv16i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, i32 %vl, i32 4)
@@ -12090,8 +12090,8 @@ define <vscale x 8 x bfloat> @test_vluxseg2_mask_nxv8bf16_triscv.vector.tuple_nx
; CHECK-LABEL: test_vluxseg2_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4)
@@ -12158,8 +12158,8 @@ define <vscale x 16 x bfloat> @test_vluxseg2_nxv16bf16_triscv.vector.tuple_nxv32
; CHECK-LABEL: test_vluxseg2_nxv16bf16_triscv.vector.tuple_nxv32i8_2t_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.nxv16i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i8> %index, i32 %vl, i32 4)
@@ -12171,8 +12171,8 @@ define <vscale x 16 x bfloat> @test_vluxseg2_mask_nxv16bf16_triscv.vector.tuple_
; CHECK-LABEL: test_vluxseg2_mask_nxv16bf16_triscv.vector.tuple_nxv32i8_2t_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1.nxv16i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i8> %index, <vscale x 16 x i1> %mask, i32 %vl, i32 1, i32 4)
@@ -12482,8 +12482,8 @@ define <vscale x 8 x bfloat> @test_vluxseg3_nxv8bf16_triscv.vector.tuple_nxv16i8
; CHECK-LABEL: test_vluxseg3_nxv8bf16_triscv.vector.tuple_nxv16i8_3t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i8> %index, i32 %vl, i32 4)
@@ -12495,8 +12495,8 @@ define <vscale x 8 x bfloat> @test_vluxseg3_mask_nxv8bf16_triscv.vector.tuple_nx
; CHECK-LABEL: test_vluxseg3_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_3t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4)
@@ -12806,8 +12806,8 @@ define <vscale x 8 x bfloat> @test_vluxseg4_nxv8bf16_triscv.vector.tuple_nxv16i8
; CHECK-LABEL: test_vluxseg4_nxv8bf16_triscv.vector.tuple_nxv16i8_4t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei8.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i8> %index, i32 %vl, i32 4)
@@ -12819,8 +12819,8 @@ define <vscale x 8 x bfloat> @test_vluxseg4_mask_nxv8bf16_triscv.vector.tuple_nx
; CHECK-LABEL: test_vluxseg4_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_4t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei8.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv64.ll
index 0f7348b474ee4..ca07880f80de5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv64.ll
@@ -4015,8 +4015,8 @@ define <vscale x 8 x i16> @test_vluxseg2_nxv8i16_triscv.vector.tuple_nxv16i8_2t_
; CHECK-LABEL: test_vluxseg2_nxv8i16_triscv.vector.tuple_nxv16i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, i64 %vl, i64 4)
@@ -4028,8 +4028,8 @@ define <vscale x 8 x i16> @test_vluxseg2_mask_nxv8i16_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg2_mask_nxv8i16_triscv.vector.tuple_nxv16i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4)
@@ -4131,8 +4131,8 @@ define <vscale x 16 x i16> @test_vluxseg2_nxv16i16_triscv.vector.tuple_nxv32i8_2
; CHECK-LABEL: test_vluxseg2_nxv16i16_triscv.vector.tuple_nxv32i8_2t_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.nxv16i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i8> %index, i64 %vl, i64 4)
@@ -4144,8 +4144,8 @@ define <vscale x 16 x i16> @test_vluxseg2_mask_nxv16i16_triscv.vector.tuple_nxv3
; CHECK-LABEL: test_vluxseg2_mask_nxv16i16_triscv.vector.tuple_nxv32i8_2t_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1.nxv16i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i8> %index, <vscale x 16 x i1> %mask, i64 %vl, i64 1, i64 4)
@@ -4566,8 +4566,8 @@ define <vscale x 8 x i16> @test_vluxseg3_nxv8i16_triscv.vector.tuple_nxv16i8_3t_
; CHECK-LABEL: test_vluxseg3_nxv8i16_triscv.vector.tuple_nxv16i8_3t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i8> %index, i64 %vl, i64 4)
@@ -4579,8 +4579,8 @@ define <vscale x 8 x i16> @test_vluxseg3_mask_nxv8i16_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg3_mask_nxv8i16_triscv.vector.tuple_nxv16i8_3t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4)
@@ -5030,8 +5030,8 @@ define <vscale x 8 x i16> @test_vluxseg4_nxv8i16_triscv.vector.tuple_nxv16i8_4t_
; CHECK-LABEL: test_vluxseg4_nxv8i16_triscv.vector.tuple_nxv16i8_4t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei8.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i8> %index, i64 %vl, i64 4)
@@ -5043,8 +5043,8 @@ define <vscale x 8 x i16> @test_vluxseg4_mask_nxv8i16_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg4_mask_nxv8i16_triscv.vector.tuple_nxv16i8_4t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei8.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4)
@@ -6770,8 +6770,8 @@ define <vscale x 4 x i32> @test_vluxseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t_
; CHECK-LABEL: test_vluxseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, i64 %vl, i64 5)
@@ -6783,8 +6783,8 @@ define <vscale x 4 x i32> @test_vluxseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -6799,8 +6799,8 @@ define <vscale x 4 x i32> @test_vluxseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t_
; CHECK-LABEL: test_vluxseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei16.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, i64 %vl, i64 5)
@@ -6812,8 +6812,8 @@ define <vscale x 4 x i32> @test_vluxseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei16.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -6886,8 +6886,8 @@ define <vscale x 8 x i32> @test_vluxseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t_
; CHECK-LABEL: test_vluxseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.nxv8i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, i64 %vl, i64 5)
@@ -6899,8 +6899,8 @@ define <vscale x 8 x i32> @test_vluxseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i
; CHECK-LABEL: test_vluxseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -6915,8 +6915,8 @@ define <vscale x 8 x i32> @test_vluxseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t_
; CHECK-LABEL: test_vluxseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxseg2ei16.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.nxv8i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i16> %index, i64 %vl, i64 5)
@@ -6928,8 +6928,8 @@ define <vscale x 8 x i32> @test_vluxseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i
; CHECK-LABEL: test_vluxseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i8_2t_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxseg2ei16.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1.nxv8i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i16> %index, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -7234,8 +7234,8 @@ define <vscale x 4 x i32> @test_vluxseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t_
; CHECK-LABEL: test_vluxseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i8> %index, i64 %vl, i64 5)
@@ -7247,8 +7247,8 @@ define <vscale x 4 x i32> @test_vluxseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i8_3t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -7263,8 +7263,8 @@ define <vscale x 4 x i32> @test_vluxseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t_
; CHECK-LABEL: test_vluxseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei16.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i16> %index, i64 %vl, i64 5)
@@ -7276,8 +7276,8 @@ define <vscale x 4 x i32> @test_vluxseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i8_3t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei16.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -7582,8 +7582,8 @@ define <vscale x 4 x i32> @test_vluxseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t_
; CHECK-LABEL: test_vluxseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei8.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i8> %index, i64 %vl, i64 5)
@@ -7595,8 +7595,8 @@ define <vscale x 4 x i32> @test_vluxseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i8_4t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei8.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -7611,8 +7611,8 @@ define <vscale x 4 x i32> @test_vluxseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t_
; CHECK-LABEL: test_vluxseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei16.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i16> %index, i64 %vl, i64 5)
@@ -7624,8 +7624,8 @@ define <vscale x 4 x i32> @test_vluxseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i8_4t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei16.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -8742,8 +8742,8 @@ define <vscale x 2 x i64> @test_vluxseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t_
; CHECK-LABEL: test_vluxseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i8> %index, i64 %vl, i64 6)
@@ -8755,8 +8755,8 @@ define <vscale x 2 x i64> @test_vluxseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i8> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -8771,8 +8771,8 @@ define <vscale x 2 x i64> @test_vluxseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t_
; CHECK-LABEL: test_vluxseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei16.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i16> %index, i64 %vl, i64 6)
@@ -8784,8 +8784,8 @@ define <vscale x 2 x i64> @test_vluxseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei16.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i16> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -8800,8 +8800,8 @@ define <vscale x 2 x i64> @test_vluxseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t_
; CHECK-LABEL: test_vluxseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei32.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei32.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i32> %index, i64 %vl, i64 6)
@@ -8813,8 +8813,8 @@ define <vscale x 2 x i64> @test_vluxseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei32.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei32.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i32> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -8858,8 +8858,8 @@ define <vscale x 4 x i64> @test_vluxseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t_
; CHECK-LABEL: test_vluxseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.nxv4i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, i64 %vl, i64 6)
@@ -8871,8 +8871,8 @@ define <vscale x 4 x i64> @test_vluxseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i
; CHECK-LABEL: test_vluxseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -8887,8 +8887,8 @@ define <vscale x 4 x i64> @test_vluxseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t_
; CHECK-LABEL: test_vluxseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg2ei16.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.nxv4i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, i64 %vl, i64 6)
@@ -8900,8 +8900,8 @@ define <vscale x 4 x i64> @test_vluxseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i
; CHECK-LABEL: test_vluxseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg2ei16.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -8916,8 +8916,8 @@ define <vscale x 4 x i64> @test_vluxseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t_
; CHECK-LABEL: test_vluxseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei32.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxseg2ei32.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.nxv4i32(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i32> %index, i64 %vl, i64 6)
@@ -8929,8 +8929,8 @@ define <vscale x 4 x i64> @test_vluxseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i
; CHECK-LABEL: test_vluxseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei32.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxseg2ei32.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1.nxv4i32(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i32> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -9090,8 +9090,8 @@ define <vscale x 2 x i64> @test_vluxseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t_
; CHECK-LABEL: test_vluxseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i8> %index, i64 %vl, i64 6)
@@ -9103,8 +9103,8 @@ define <vscale x 2 x i64> @test_vluxseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i8> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -9119,8 +9119,8 @@ define <vscale x 2 x i64> @test_vluxseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t_
; CHECK-LABEL: test_vluxseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei16.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i16> %index, i64 %vl, i64 6)
@@ -9132,8 +9132,8 @@ define <vscale x 2 x i64> @test_vluxseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei16.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i16> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -9148,8 +9148,8 @@ define <vscale x 2 x i64> @test_vluxseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t_
; CHECK-LABEL: test_vluxseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei32.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei32.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i32> %index, i64 %vl, i64 6)
@@ -9161,8 +9161,8 @@ define <vscale x 2 x i64> @test_vluxseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei32.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei32.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i32> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -9322,8 +9322,8 @@ define <vscale x 2 x i64> @test_vluxseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t_
; CHECK-LABEL: test_vluxseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei8.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i8> %index, i64 %vl, i64 6)
@@ -9335,8 +9335,8 @@ define <vscale x 2 x i64> @test_vluxseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei8.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i8> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -9351,8 +9351,8 @@ define <vscale x 2 x i64> @test_vluxseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t_
; CHECK-LABEL: test_vluxseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei16.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i16> %index, i64 %vl, i64 6)
@@ -9364,8 +9364,8 @@ define <vscale x 2 x i64> @test_vluxseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei16.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i16> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -9380,8 +9380,8 @@ define <vscale x 2 x i64> @test_vluxseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t_
; CHECK-LABEL: test_vluxseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei32.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei32.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i32> %index, i64 %vl, i64 6)
@@ -9393,8 +9393,8 @@ define <vscale x 2 x i64> @test_vluxseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i
; CHECK-LABEL: test_vluxseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei32.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei32.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i32> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -10224,8 +10224,8 @@ define <vscale x 8 x half> @test_vluxseg2_nxv8f16_triscv.vector.tuple_nxv16i8_2t
; CHECK-LABEL: test_vluxseg2_nxv8f16_triscv.vector.tuple_nxv16i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, i64 %vl, i64 4)
@@ -10237,8 +10237,8 @@ define <vscale x 8 x half> @test_vluxseg2_mask_nxv8f16_triscv.vector.tuple_nxv16
; CHECK-LABEL: test_vluxseg2_mask_nxv8f16_triscv.vector.tuple_nxv16i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4)
@@ -10332,8 +10332,8 @@ define <vscale x 16 x half> @test_vluxseg2_nxv16f16_triscv.vector.tuple_nxv32i8_
; CHECK-LABEL: test_vluxseg2_nxv16f16_triscv.vector.tuple_nxv32i8_2t_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.nxv16i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i8> %index, i64 %vl, i64 4)
@@ -10345,8 +10345,8 @@ define <vscale x 16 x half> @test_vluxseg2_mask_nxv16f16_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vluxseg2_mask_nxv16f16_triscv.vector.tuple_nxv32i8_2t_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1.nxv16i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i8> %index, <vscale x 16 x i1> %mask, i64 %vl, i64 1, i64 4)
@@ -10737,8 +10737,8 @@ define <vscale x 8 x half> @test_vluxseg3_nxv8f16_triscv.vector.tuple_nxv16i8_3t
; CHECK-LABEL: test_vluxseg3_nxv8f16_triscv.vector.tuple_nxv16i8_3t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i8> %index, i64 %vl, i64 4)
@@ -10750,8 +10750,8 @@ define <vscale x 8 x half> @test_vluxseg3_mask_nxv8f16_triscv.vector.tuple_nxv16
; CHECK-LABEL: test_vluxseg3_mask_nxv8f16_triscv.vector.tuple_nxv16i8_3t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4)
@@ -11169,8 +11169,8 @@ define <vscale x 8 x half> @test_vluxseg4_nxv8f16_triscv.vector.tuple_nxv16i8_4t
; CHECK-LABEL: test_vluxseg4_nxv8f16_triscv.vector.tuple_nxv16i8_4t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei8.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i8> %index, i64 %vl, i64 4)
@@ -11182,8 +11182,8 @@ define <vscale x 8 x half> @test_vluxseg4_mask_nxv8f16_triscv.vector.tuple_nxv16
; CHECK-LABEL: test_vluxseg4_mask_nxv8f16_triscv.vector.tuple_nxv16i8_4t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei8.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4)
@@ -12789,8 +12789,8 @@ define <vscale x 4 x float> @test_vluxseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2
; CHECK-LABEL: test_vluxseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, i64 %vl, i64 5)
@@ -12802,8 +12802,8 @@ define <vscale x 4 x float> @test_vluxseg2_mask_nxv4f32_triscv.vector.tuple_nxv1
; CHECK-LABEL: test_vluxseg2_mask_nxv4f32_triscv.vector.tuple_nxv16i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -12816,8 +12816,8 @@ define <vscale x 4 x float> @test_vluxseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2
; CHECK-LABEL: test_vluxseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei16.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, i64 %vl, i64 5)
@@ -12829,8 +12829,8 @@ define <vscale x 4 x float> @test_vluxseg2_mask_nxv4f32_triscv.vector.tuple_nxv1
; CHECK-LABEL: test_vluxseg2_mask_nxv4f32_triscv.vector.tuple_nxv16i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei16.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -12897,8 +12897,8 @@ define <vscale x 8 x float> @test_vluxseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2
; CHECK-LABEL: test_vluxseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.nxv8i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, i64 %vl, i64 5)
@@ -12910,8 +12910,8 @@ define <vscale x 8 x float> @test_vluxseg2_mask_nxv8f32_triscv.vector.tuple_nxv3
; CHECK-LABEL: test_vluxseg2_mask_nxv8f32_triscv.vector.tuple_nxv32i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -12924,8 +12924,8 @@ define <vscale x 8 x float> @test_vluxseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2
; CHECK-LABEL: test_vluxseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2t_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxseg2ei16.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.nxv8i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i16> %index, i64 %vl, i64 5)
@@ -12937,8 +12937,8 @@ define <vscale x 8 x float> @test_vluxseg2_mask_nxv8f32_triscv.vector.tuple_nxv3
; CHECK-LABEL: test_vluxseg2_mask_nxv8f32_triscv.vector.tuple_nxv32i8_2t_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxseg2ei16.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1.nxv8i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i16> %index, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -13221,8 +13221,8 @@ define <vscale x 4 x float> @test_vluxseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3
; CHECK-LABEL: test_vluxseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i8> %index, i64 %vl, i64 5)
@@ -13234,8 +13234,8 @@ define <vscale x 4 x float> @test_vluxseg3_mask_nxv4f32_triscv.vector.tuple_nxv1
; CHECK-LABEL: test_vluxseg3_mask_nxv4f32_triscv.vector.tuple_nxv16i8_3t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -13248,8 +13248,8 @@ define <vscale x 4 x float> @test_vluxseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3
; CHECK-LABEL: test_vluxseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei16.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i16> %index, i64 %vl, i64 5)
@@ -13261,8 +13261,8 @@ define <vscale x 4 x float> @test_vluxseg3_mask_nxv4f32_triscv.vector.tuple_nxv1
; CHECK-LABEL: test_vluxseg3_mask_nxv4f32_triscv.vector.tuple_nxv16i8_3t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei16.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -13545,8 +13545,8 @@ define <vscale x 4 x float> @test_vluxseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4
; CHECK-LABEL: test_vluxseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei8.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i8> %index, i64 %vl, i64 5)
@@ -13558,8 +13558,8 @@ define <vscale x 4 x float> @test_vluxseg4_mask_nxv4f32_triscv.vector.tuple_nxv1
; CHECK-LABEL: test_vluxseg4_mask_nxv4f32_triscv.vector.tuple_nxv16i8_4t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei8.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -13572,8 +13572,8 @@ define <vscale x 4 x float> @test_vluxseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4
; CHECK-LABEL: test_vluxseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei16.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i16> %index, i64 %vl, i64 5)
@@ -13585,8 +13585,8 @@ define <vscale x 4 x float> @test_vluxseg4_mask_nxv4f32_triscv.vector.tuple_nxv1
; CHECK-LABEL: test_vluxseg4_mask_nxv4f32_triscv.vector.tuple_nxv16i8_4t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei16.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5)
@@ -14625,8 +14625,8 @@ define <vscale x 2 x double> @test_vluxseg2_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vluxseg2_nxv2f64_triscv.vector.tuple_nxv16i8_2t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i8> %index, i64 %vl, i64 6)
@@ -14638,8 +14638,8 @@ define <vscale x 2 x double> @test_vluxseg2_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vluxseg2_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i8> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -14652,8 +14652,8 @@ define <vscale x 2 x double> @test_vluxseg2_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vluxseg2_nxv2f64_triscv.vector.tuple_nxv16i8_2t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei16.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i16> %index, i64 %vl, i64 6)
@@ -14665,8 +14665,8 @@ define <vscale x 2 x double> @test_vluxseg2_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vluxseg2_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei16.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i16> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -14679,8 +14679,8 @@ define <vscale x 2 x double> @test_vluxseg2_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vluxseg2_nxv2f64_triscv.vector.tuple_nxv16i8_2t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei32.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei32.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i32> %index, i64 %vl, i64 6)
@@ -14692,8 +14692,8 @@ define <vscale x 2 x double> @test_vluxseg2_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vluxseg2_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei32.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei32.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i32> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -14733,8 +14733,8 @@ define <vscale x 4 x double> @test_vluxseg2_nxv4f64_triscv.vector.tuple_nxv32i8_
; CHECK-LABEL: test_vluxseg2_nxv4f64_triscv.vector.tuple_nxv32i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.nxv4i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, i64 %vl, i64 6)
@@ -14746,8 +14746,8 @@ define <vscale x 4 x double> @test_vluxseg2_mask_nxv4f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vluxseg2_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -14760,8 +14760,8 @@ define <vscale x 4 x double> @test_vluxseg2_nxv4f64_triscv.vector.tuple_nxv32i8_
; CHECK-LABEL: test_vluxseg2_nxv4f64_triscv.vector.tuple_nxv32i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg2ei16.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.nxv4i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, i64 %vl, i64 6)
@@ -14773,8 +14773,8 @@ define <vscale x 4 x double> @test_vluxseg2_mask_nxv4f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vluxseg2_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei16.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg2ei16.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -14787,8 +14787,8 @@ define <vscale x 4 x double> @test_vluxseg2_nxv4f64_triscv.vector.tuple_nxv32i8_
; CHECK-LABEL: test_vluxseg2_nxv4f64_triscv.vector.tuple_nxv32i8_2t_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei32.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxseg2ei32.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.nxv4i32(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i32> %index, i64 %vl, i64 6)
@@ -14800,8 +14800,8 @@ define <vscale x 4 x double> @test_vluxseg2_mask_nxv4f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vluxseg2_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei32.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxseg2ei32.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1.nxv4i32(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i32> %index, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -14949,8 +14949,8 @@ define <vscale x 2 x double> @test_vluxseg3_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vluxseg3_nxv2f64_triscv.vector.tuple_nxv16i8_3t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i8> %index, i64 %vl, i64 6)
@@ -14962,8 +14962,8 @@ define <vscale x 2 x double> @test_vluxseg3_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vluxseg3_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i8> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -14976,8 +14976,8 @@ define <vscale x 2 x double> @test_vluxseg3_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vluxseg3_nxv2f64_triscv.vector.tuple_nxv16i8_3t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei16.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i16> %index, i64 %vl, i64 6)
@@ -14989,8 +14989,8 @@ define <vscale x 2 x double> @test_vluxseg3_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vluxseg3_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei16.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i16> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -15003,8 +15003,8 @@ define <vscale x 2 x double> @test_vluxseg3_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vluxseg3_nxv2f64_triscv.vector.tuple_nxv16i8_3t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei32.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei32.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i32> %index, i64 %vl, i64 6)
@@ -15016,8 +15016,8 @@ define <vscale x 2 x double> @test_vluxseg3_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vluxseg3_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei32.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei32.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i32> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -15165,8 +15165,8 @@ define <vscale x 2 x double> @test_vluxseg4_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vluxseg4_nxv2f64_triscv.vector.tuple_nxv16i8_4t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei8.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i8> %index, i64 %vl, i64 6)
@@ -15178,8 +15178,8 @@ define <vscale x 2 x double> @test_vluxseg4_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vluxseg4_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei8.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1.nxv2i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i8> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -15192,8 +15192,8 @@ define <vscale x 2 x double> @test_vluxseg4_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vluxseg4_nxv2f64_triscv.vector.tuple_nxv16i8_4t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei16.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei16.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i16> %index, i64 %vl, i64 6)
@@ -15205,8 +15205,8 @@ define <vscale x 2 x double> @test_vluxseg4_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vluxseg4_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei16.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei16.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1.nxv2i16(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i16> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -15219,8 +15219,8 @@ define <vscale x 2 x double> @test_vluxseg4_nxv2f64_triscv.vector.tuple_nxv16i8_
; CHECK-LABEL: test_vluxseg4_nxv2f64_triscv.vector.tuple_nxv16i8_4t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei32.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei32.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i32> %index, i64 %vl, i64 6)
@@ -15232,8 +15232,8 @@ define <vscale x 2 x double> @test_vluxseg4_mask_nxv2f64_triscv.vector.tuple_nxv
; CHECK-LABEL: test_vluxseg4_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei32.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei32.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1.nxv2i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i32> %index, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
@@ -16029,8 +16029,8 @@ define <vscale x 8 x bfloat> @test_vluxseg2_nxv8bf16_triscv.vector.tuple_nxv16i8
; CHECK-LABEL: test_vluxseg2_nxv8bf16_triscv.vector.tuple_nxv16i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, i64 %vl, i64 4)
@@ -16042,8 +16042,8 @@ define <vscale x 8 x bfloat> @test_vluxseg2_mask_nxv8bf16_triscv.vector.tuple_nx
; CHECK-LABEL: test_vluxseg2_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_2t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4)
@@ -16137,8 +16137,8 @@ define <vscale x 16 x bfloat> @test_vluxseg2_nxv16bf16_triscv.vector.tuple_nxv32
; CHECK-LABEL: test_vluxseg2_nxv16bf16_triscv.vector.tuple_nxv32i8_2t_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v12, (a0), v8
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.nxv16i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i8> %index, i64 %vl, i64 4)
@@ -16150,8 +16150,8 @@ define <vscale x 16 x bfloat> @test_vluxseg2_mask_nxv16bf16_triscv.vector.tuple_
; CHECK-LABEL: test_vluxseg2_mask_nxv16bf16_triscv.vector.tuple_nxv32i8_2t_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vluxseg2ei8.v v12, (a0), v8, v0.t
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1.nxv16i8(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i8> %index, <vscale x 16 x i1> %mask, i64 %vl, i64 1, i64 4)
@@ -16542,8 +16542,8 @@ define <vscale x 8 x bfloat> @test_vluxseg3_nxv8bf16_triscv.vector.tuple_nxv16i8
; CHECK-LABEL: test_vluxseg3_nxv8bf16_triscv.vector.tuple_nxv16i8_3t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i8> %index, i64 %vl, i64 4)
@@ -16555,8 +16555,8 @@ define <vscale x 8 x bfloat> @test_vluxseg3_mask_nxv8bf16_triscv.vector.tuple_nx
; CHECK-LABEL: test_vluxseg3_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_3t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg3ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v12, v8
+; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4)
@@ -16974,8 +16974,8 @@ define <vscale x 8 x bfloat> @test_vluxseg4_nxv8bf16_triscv.vector.tuple_nxv16i8
; CHECK-LABEL: test_vluxseg4_nxv8bf16_triscv.vector.tuple_nxv16i8_4t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei8.v v10, (a0), v8
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei8.v v6, (a0), v14
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i8> %index, i64 %vl, i64 4)
@@ -16987,8 +16987,8 @@ define <vscale x 8 x bfloat> @test_vluxseg4_mask_nxv8bf16_triscv.vector.tuple_nx
; CHECK-LABEL: test_vluxseg4_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_4t_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vluxseg4ei8.v v10, (a0), v8, v0.t
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vmv1r.v v14, v8
+; CHECK-NEXT: vluxseg4ei8.v v6, (a0), v14, v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4)
More information about the llvm-commits
mailing list