[llvm] 9c41083 - [RISCV] Legalize fixed length (insert_subvector undef, X, 0) to a scalable insert.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 17 14:40:42 PST 2022
Author: Craig Topper
Date: 2022-01-17T14:31:30-08:00
New Revision: 9c410838d2bcaa087e867e9f8a353fe455588287
URL: https://github.com/llvm/llvm-project/commit/9c410838d2bcaa087e867e9f8a353fe455588287
DIFF: https://github.com/llvm/llvm-project/commit/9c410838d2bcaa087e867e9f8a353fe455588287.diff
LOG: [RISCV] Legalize fixed length (insert_subvector undef, X, 0) to a scalable insert.
We were considering this legal, but later the undef would become an all
zeros vector. This would cause us to need to re-legalize the insert later
into a vslideup with zero vector.
This patch catches the case and directly legalizes it to a scalable
insert.
Reviewed By: frasercrmck
Differential Revision: https://reviews.llvm.org/D117377
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index e6d855b9620b2..5dd10a46129a0 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4825,7 +4825,7 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
// register size. Therefore we must slide the vector group up the full
// amount.
if (SubVecVT.isFixedLengthVector()) {
- if (OrigIdx == 0 && Vec.isUndef())
+ if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
return Op;
MVT ContainerVT = VecVT;
if (VecVT.isFixedLengthVector()) {
@@ -4835,6 +4835,10 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
DAG.getUNDEF(ContainerVT), SubVec,
DAG.getConstant(0, DL, XLenVT));
+ if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
+ SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
+ return DAG.getBitcast(Op.getValueType(), SubVec);
+ }
SDValue Mask =
getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
// Set the vector length to only the number of elements we care about. Note
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll
index e23012988db4e..063d4f380faf3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll
@@ -847,16 +847,11 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %z) {
; LMULMAX1-LABEL: truncstore_v16i16_v16i8:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, mu
-; LMULMAX1-NEXT: vmv.v.i v10, 0
-; LMULMAX1-NEXT: vsetivli zero, 8, e8, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v10, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0
; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v10, v8, 8
-; LMULMAX1-NEXT: vse8.v v10, (a0)
+; LMULMAX1-NEXT: vslideup.vi v8, v9, 8
+; LMULMAX1-NEXT: vse8.v v8, (a0)
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: truncstore_v16i16_v16i8:
@@ -1162,20 +1157,16 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, <8 x i8>* %z) {
; LMULMAX1-LABEL: truncstore_v8i32_v8i8:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; LMULMAX1-NEXT: vmv.v.i v10, 0
-; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v10, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v10, v8, 4
-; LMULMAX1-NEXT: vse8.v v10, (a0)
+; LMULMAX1-NEXT: vslideup.vi v8, v9, 4
+; LMULMAX1-NEXT: vse8.v v8, (a0)
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: truncstore_v8i32_v8i8:
@@ -1195,16 +1186,11 @@ define void @truncstore_v8i32_v8i16(<8 x i32> %x, <8 x i16>* %z) {
; LMULMAX1-LABEL: truncstore_v8i32_v8i16:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu
-; LMULMAX1-NEXT: vmv.v.i v10, 0
-; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v10, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0
; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v10, v8, 4
-; LMULMAX1-NEXT: vse16.v v10, (a0)
+; LMULMAX1-NEXT: vslideup.vi v8, v9, 4
+; LMULMAX1-NEXT: vse16.v v8, (a0)
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: truncstore_v8i32_v8i16:
@@ -1284,32 +1270,28 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, <16 x i8>* %z) {
; LMULMAX1-LABEL: truncstore_v16i32_v16i8:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, mu
-; LMULMAX1-NEXT: vmv.v.i v12, 0
-; LMULMAX1-NEXT: vsetivli zero, 4, e8, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
; LMULMAX1-NEXT: vsetivli zero, 8, e8, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v8, 4
+; LMULMAX1-NEXT: vslideup.vi v8, v9, 4
; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v10, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v10, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vsetivli zero, 12, e8, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v8, 8
+; LMULMAX1-NEXT: vslideup.vi v8, v9, 8
; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v11, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v8, 12
-; LMULMAX1-NEXT: vse8.v v12, (a0)
+; LMULMAX1-NEXT: vslideup.vi v8, v9, 12
+; LMULMAX1-NEXT: vse8.v v8, (a0)
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: truncstore_v16i32_v16i8:
@@ -1329,27 +1311,18 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %z) {
; LMULMAX1-LABEL: truncstore_v16i32_v16i16:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu
-; LMULMAX1-NEXT: vmv.v.i v12, 0
-; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, mu
-; LMULMAX1-NEXT: vmv1r.v v13, v12
-; LMULMAX1-NEXT: vslideup.vi v13, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0
; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v13, v8, 4
-; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v10, 0
-; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v8, 0
+; LMULMAX1-NEXT: vslideup.vi v8, v9, 4
; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v11, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0
+; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0
; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v8, 4
+; LMULMAX1-NEXT: vslideup.vi v10, v9, 4
; LMULMAX1-NEXT: addi a1, a0, 16
-; LMULMAX1-NEXT: vse16.v v12, (a1)
-; LMULMAX1-NEXT: vse16.v v13, (a0)
+; LMULMAX1-NEXT: vse16.v v10, (a1)
+; LMULMAX1-NEXT: vse16.v v8, (a0)
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: truncstore_v16i32_v16i16:
@@ -1505,24 +1478,20 @@ define void @truncstore_v4i64_v4i8(<4 x i64> %x, <4 x i8>* %z) {
; LMULMAX1-LABEL: truncstore_v4i64_v4i8:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
-; LMULMAX1-NEXT: vmv.v.i v10, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v10, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf4, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v10, v8, 2
-; LMULMAX1-NEXT: vse8.v v10, (a0)
+; LMULMAX1-NEXT: vslideup.vi v8, v9, 2
+; LMULMAX1-NEXT: vse8.v v8, (a0)
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: truncstore_v4i64_v4i8:
@@ -1544,20 +1513,16 @@ define void @truncstore_v4i64_v4i16(<4 x i64> %x, <4 x i16>* %z) {
; LMULMAX1-LABEL: truncstore_v4i64_v4i16:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
-; LMULMAX1-NEXT: vmv.v.i v10, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v10, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v10, v8, 2
-; LMULMAX1-NEXT: vse16.v v10, (a0)
+; LMULMAX1-NEXT: vslideup.vi v8, v9, 2
+; LMULMAX1-NEXT: vse16.v v8, (a0)
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: truncstore_v4i64_v4i16:
@@ -1577,16 +1542,11 @@ define void @truncstore_v4i64_v4i32(<4 x i64> %x, <4 x i32>* %z) {
; LMULMAX1-LABEL: truncstore_v4i64_v4i32:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX1-NEXT: vmv.v.i v10, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v10, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0
; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v10, v8, 2
-; LMULMAX1-NEXT: vse32.v v10, (a0)
+; LMULMAX1-NEXT: vslideup.vi v8, v9, 2
+; LMULMAX1-NEXT: vse32.v v8, (a0)
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: truncstore_v4i64_v4i32:
@@ -1604,40 +1564,36 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %z) {
; LMULMAX1-LABEL: truncstore_v8i64_v8i8:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; LMULMAX1-NEXT: vmv.v.i v12, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf2, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v8, 2
+; LMULMAX1-NEXT: vslideup.vi v8, v9, 2
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v10, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v10, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vsetivli zero, 6, e8, mf2, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v8, 4
+; LMULMAX1-NEXT: vslideup.vi v8, v9, 4
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v11, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v8, 6
-; LMULMAX1-NEXT: vse8.v v12, (a0)
+; LMULMAX1-NEXT: vslideup.vi v8, v9, 6
+; LMULMAX1-NEXT: vse8.v v8, (a0)
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: truncstore_v8i64_v8i8:
@@ -1659,32 +1615,28 @@ define void @truncstore_v8i64_v8i16(<8 x i64> %x, <8 x i16>* %z) {
; LMULMAX1-LABEL: truncstore_v8i64_v8i16:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu
-; LMULMAX1-NEXT: vmv.v.i v12, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e16, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v8, 2
+; LMULMAX1-NEXT: vslideup.vi v8, v9, 2
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v10, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v10, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vsetivli zero, 6, e16, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v8, 4
+; LMULMAX1-NEXT: vslideup.vi v8, v9, 4
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v11, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v8, 6
-; LMULMAX1-NEXT: vse16.v v12, (a0)
+; LMULMAX1-NEXT: vslideup.vi v8, v9, 6
+; LMULMAX1-NEXT: vse16.v v8, (a0)
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: truncstore_v8i64_v8i16:
@@ -1704,27 +1656,18 @@ define void @truncstore_v8i64_v8i32(<8 x i64> %x, <8 x i32>* %z) {
; LMULMAX1-LABEL: truncstore_v8i64_v8i32:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX1-NEXT: vmv.v.i v12, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, tu, mu
-; LMULMAX1-NEXT: vmv1r.v v13, v12
-; LMULMAX1-NEXT: vslideup.vi v13, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0
; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v13, v8, 2
-; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v10, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v8, 0
+; LMULMAX1-NEXT: vslideup.vi v8, v9, 2
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v11, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0
+; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0
; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v8, 2
+; LMULMAX1-NEXT: vslideup.vi v10, v9, 2
; LMULMAX1-NEXT: addi a1, a0, 16
-; LMULMAX1-NEXT: vse32.v v12, (a1)
-; LMULMAX1-NEXT: vse32.v v13, (a0)
+; LMULMAX1-NEXT: vse32.v v10, (a1)
+; LMULMAX1-NEXT: vse32.v v8, (a0)
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: truncstore_v8i64_v8i32:
@@ -1742,21 +1685,17 @@ define void @truncstore_v16i64_v16i8(<16 x i64> %x, <16 x i8>* %z) {
; LMULMAX1-LABEL: truncstore_v16i64_v16i8:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v16, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, mu
-; LMULMAX1-NEXT: vmv.v.i v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e8, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v8, v16, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
+; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
+; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
; LMULMAX1-NEXT: vsetivli zero, 4, e8, m1, tu, mu
; LMULMAX1-NEXT: vslideup.vi v8, v9, 2
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
@@ -1813,24 +1752,20 @@ define void @truncstore_v16i64_v16i8(<16 x i64> %x, <16 x i8>* %z) {
; LMULMAX4-LABEL: truncstore_v16i64_v16i8:
; LMULMAX4: # %bb.0:
; LMULMAX4-NEXT: vsetivli zero, 8, e32, m2, ta, mu
-; LMULMAX4-NEXT: vnsrl.wi v16, v8, 0
+; LMULMAX4-NEXT: vnsrl.wi v16, v12, 0
; LMULMAX4-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; LMULMAX4-NEXT: vnsrl.wi v8, v16, 0
+; LMULMAX4-NEXT: vnsrl.wi v12, v16, 0
; LMULMAX4-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
-; LMULMAX4-NEXT: vnsrl.wi v8, v8, 0
-; LMULMAX4-NEXT: vsetivli zero, 16, e8, m1, ta, mu
-; LMULMAX4-NEXT: vmv.v.i v9, 0
-; LMULMAX4-NEXT: vsetivli zero, 8, e8, m1, tu, mu
-; LMULMAX4-NEXT: vslideup.vi v9, v8, 0
-; LMULMAX4-NEXT: vsetivli zero, 8, e32, m2, ta, mu
-; LMULMAX4-NEXT: vnsrl.wi v10, v12, 0
+; LMULMAX4-NEXT: vnsrl.wi v12, v12, 0
+; LMULMAX4-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; LMULMAX4-NEXT: vnsrl.wi v14, v8, 0
; LMULMAX4-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; LMULMAX4-NEXT: vnsrl.wi v8, v10, 0
+; LMULMAX4-NEXT: vnsrl.wi v8, v14, 0
; LMULMAX4-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
; LMULMAX4-NEXT: vnsrl.wi v8, v8, 0
; LMULMAX4-NEXT: vsetivli zero, 16, e8, m1, tu, mu
-; LMULMAX4-NEXT: vslideup.vi v9, v8, 8
-; LMULMAX4-NEXT: vse8.v v9, (a0)
+; LMULMAX4-NEXT: vslideup.vi v8, v12, 8
+; LMULMAX4-NEXT: vse8.v v8, (a0)
; LMULMAX4-NEXT: ret
%y = trunc <16 x i64> %x to <16 x i8>
store <16 x i8> %y, <16 x i8>* %z
@@ -1841,59 +1776,52 @@ define void @truncstore_v16i64_v16i16(<16 x i64> %x, <16 x i16>* %z) {
; LMULMAX1-LABEL: truncstore_v16i64_v16i16:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu
-; LMULMAX1-NEXT: vmv.v.i v16, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e16, m1, tu, mu
-; LMULMAX1-NEXT: vmv1r.v v17, v16
-; LMULMAX1-NEXT: vslideup.vi v17, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v17, v8, 2
+; LMULMAX1-NEXT: vslideup.vi v8, v9, 2
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v10, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v10, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vsetivli zero, 6, e16, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v17, v8, 4
+; LMULMAX1-NEXT: vslideup.vi v8, v9, 4
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v11, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v17, v8, 6
+; LMULMAX1-NEXT: vslideup.vi v8, v9, 6
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v12, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v13, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e16, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v16, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v13, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; LMULMAX1-NEXT: vnsrl.wi v10, v12, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0
; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v16, v8, 2
+; LMULMAX1-NEXT: vslideup.vi v10, v9, 2
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v14, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v14, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vsetivli zero, 6, e16, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v16, v8, 4
+; LMULMAX1-NEXT: vslideup.vi v10, v9, 4
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v15, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v15, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v16, v8, 6
+; LMULMAX1-NEXT: vslideup.vi v10, v9, 6
; LMULMAX1-NEXT: addi a1, a0, 16
-; LMULMAX1-NEXT: vse16.v v16, (a1)
-; LMULMAX1-NEXT: vse16.v v17, (a0)
+; LMULMAX1-NEXT: vse16.v v10, (a1)
+; LMULMAX1-NEXT: vse16.v v8, (a0)
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: truncstore_v16i64_v16i16:
@@ -1906,13 +1834,9 @@ define void @truncstore_v16i64_v16i16(<16 x i64> %x, <16 x i16>* %z) {
; LMULMAX4-NEXT: vnsrl.wi v14, v8, 0
; LMULMAX4-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; LMULMAX4-NEXT: vnsrl.wi v8, v14, 0
-; LMULMAX4-NEXT: vsetivli zero, 16, e16, m2, ta, mu
-; LMULMAX4-NEXT: vmv.v.i v10, 0
-; LMULMAX4-NEXT: vsetivli zero, 8, e16, m2, tu, mu
-; LMULMAX4-NEXT: vslideup.vi v10, v8, 0
; LMULMAX4-NEXT: vsetivli zero, 16, e16, m2, tu, mu
-; LMULMAX4-NEXT: vslideup.vi v10, v12, 8
-; LMULMAX4-NEXT: vse16.v v10, (a0)
+; LMULMAX4-NEXT: vslideup.vi v8, v12, 8
+; LMULMAX4-NEXT: vse16.v v8, (a0)
; LMULMAX4-NEXT: ret
%y = trunc <16 x i64> %x to <16 x i16>
store <16 x i16> %y, <16 x i16>* %z
@@ -1923,49 +1847,32 @@ define void @truncstore_v16i64_v16i32(<16 x i64> %x, <16 x i32>* %z) {
; LMULMAX1-LABEL: truncstore_v16i64_v16i32:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX1-NEXT: vmv.v.i v16, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, tu, mu
-; LMULMAX1-NEXT: vmv1r.v v17, v16
-; LMULMAX1-NEXT: vslideup.vi v17, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0
; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v17, v8, 2
-; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v10, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, tu, mu
-; LMULMAX1-NEXT: vmv1r.v v9, v16
-; LMULMAX1-NEXT: vslideup.vi v9, v8, 0
+; LMULMAX1-NEXT: vslideup.vi v8, v9, 2
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v11, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0
+; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0
; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v9, v8, 2
-; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v12, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, tu, mu
-; LMULMAX1-NEXT: vmv1r.v v10, v16
-; LMULMAX1-NEXT: vslideup.vi v10, v8, 0
+; LMULMAX1-NEXT: vslideup.vi v10, v9, 2
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v13, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v13, 0
+; LMULMAX1-NEXT: vnsrl.wi v11, v12, 0
; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v10, v8, 2
+; LMULMAX1-NEXT: vslideup.vi v11, v9, 2
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v14, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v16, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v15, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v15, 0
+; LMULMAX1-NEXT: vnsrl.wi v12, v14, 0
; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v16, v8, 2
+; LMULMAX1-NEXT: vslideup.vi v12, v9, 2
; LMULMAX1-NEXT: addi a1, a0, 48
-; LMULMAX1-NEXT: vse32.v v16, (a1)
+; LMULMAX1-NEXT: vse32.v v12, (a1)
; LMULMAX1-NEXT: addi a1, a0, 32
-; LMULMAX1-NEXT: vse32.v v10, (a1)
+; LMULMAX1-NEXT: vse32.v v11, (a1)
; LMULMAX1-NEXT: addi a1, a0, 16
-; LMULMAX1-NEXT: vse32.v v9, (a1)
-; LMULMAX1-NEXT: vse32.v v17, (a0)
+; LMULMAX1-NEXT: vse32.v v10, (a1)
+; LMULMAX1-NEXT: vse32.v v8, (a0)
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: truncstore_v16i64_v16i32:
@@ -1973,13 +1880,9 @@ define void @truncstore_v16i64_v16i32(<16 x i64> %x, <16 x i32>* %z) {
; LMULMAX4-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; LMULMAX4-NEXT: vnsrl.wi v16, v12, 0
; LMULMAX4-NEXT: vnsrl.wi v12, v8, 0
-; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, mu
-; LMULMAX4-NEXT: vmv.v.i v8, 0
-; LMULMAX4-NEXT: vsetivli zero, 8, e32, m4, tu, mu
-; LMULMAX4-NEXT: vslideup.vi v8, v12, 0
; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, tu, mu
-; LMULMAX4-NEXT: vslideup.vi v8, v16, 8
-; LMULMAX4-NEXT: vse32.v v8, (a0)
+; LMULMAX4-NEXT: vslideup.vi v12, v16, 8
+; LMULMAX4-NEXT: vse32.v v12, (a0)
; LMULMAX4-NEXT: ret
%y = trunc <16 x i64> %x to <16 x i32>
store <16 x i32> %y, <16 x i32>* %z
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll
index 9eecc6e4858ec..5050af573bfd5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll
@@ -157,21 +157,16 @@ define void @fpround_v8f32_v8f16(<8 x float>* %x, <8 x half>* %y) {
;
; LMULMAX1-LABEL: fpround_v8f32_v8f16:
; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: addi a2, a0, 16
; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; LMULMAX1-NEXT: vle32.v v8, (a0)
-; LMULMAX1-NEXT: addi a0, a0, 16
-; LMULMAX1-NEXT: vle32.v v9, (a0)
+; LMULMAX1-NEXT: vle32.v v9, (a2)
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; LMULMAX1-NEXT: vfncvt.f.f.w v10, v8
-; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu
-; LMULMAX1-NEXT: vmv.v.i v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v8, v10, 0
-; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
-; LMULMAX1-NEXT: vfncvt.f.f.w v10, v9
+; LMULMAX1-NEXT: vfncvt.f.f.w v8, v9
; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v8, v10, 4
-; LMULMAX1-NEXT: vse16.v v8, (a1)
+; LMULMAX1-NEXT: vslideup.vi v10, v8, 4
+; LMULMAX1-NEXT: vse16.v v10, (a1)
; LMULMAX1-NEXT: ret
%a = load <8 x float>, <8 x float>* %x
%d = fptrunc <8 x float> %a to <8 x half>
@@ -205,29 +200,25 @@ define void @fpround_v8f64_v8f16(<8 x double>* %x, <8 x half>* %y) {
; LMULMAX1-NEXT: vfncvt.rod.f.f.w v12, v10
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; LMULMAX1-NEXT: vfncvt.f.f.w v10, v12
-; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu
-; LMULMAX1-NEXT: vmv.v.i v12, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e16, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v10, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vfncvt.rod.f.f.w v10, v11
+; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; LMULMAX1-NEXT: vfncvt.rod.f.f.w v12, v11
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT: vfncvt.f.f.w v11, v10
+; LMULMAX1-NEXT: vfncvt.f.f.w v11, v12
; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v11, 2
+; LMULMAX1-NEXT: vslideup.vi v10, v11, 2
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vfncvt.rod.f.f.w v10, v9
+; LMULMAX1-NEXT: vfncvt.rod.f.f.w v11, v9
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT: vfncvt.f.f.w v9, v10
+; LMULMAX1-NEXT: vfncvt.f.f.w v9, v11
; LMULMAX1-NEXT: vsetivli zero, 6, e16, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v9, 4
+; LMULMAX1-NEXT: vslideup.vi v10, v9, 4
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; LMULMAX1-NEXT: vfncvt.rod.f.f.w v9, v8
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; LMULMAX1-NEXT: vfncvt.f.f.w v8, v9
; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v8, 6
-; LMULMAX1-NEXT: vse16.v v12, (a1)
+; LMULMAX1-NEXT: vslideup.vi v10, v8, 6
+; LMULMAX1-NEXT: vse16.v v10, (a1)
; LMULMAX1-NEXT: ret
%a = load <8 x double>, <8 x double>* %x
%d = fptrunc <8 x double> %a to <8 x half>
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
index d0f450fcdcc38..f125dfd67c048 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
@@ -121,32 +121,24 @@ define <8 x i1> @fp2si_v8f32_v8i1(<8 x float> %x) {
;
; LMULMAX1-LABEL: fp2si_v8f32_v8i1:
; LMULMAX1: # %bb.0:
-; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; LMULMAX1-NEXT: vmclr.m v0
-; LMULMAX1-NEXT: vmv.v.i v10, 0
-; LMULMAX1-NEXT: vmerge.vim v11, v10, 1, v0
; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
-; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v12, v8
-; LMULMAX1-NEXT: vand.vi v8, v12, 1
+; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v10, v8
+; LMULMAX1-NEXT: vand.vi v8, v10, 1
; LMULMAX1-NEXT: vmsne.vi v0, v8, 0
-; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu
-; LMULMAX1-NEXT: vmv.v.i v8, 0
-; LMULMAX1-NEXT: vmerge.vim v12, v8, 1, v0
-; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v11, v12, 0
; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; LMULMAX1-NEXT: vmsne.vi v0, v11, 0
-; LMULMAX1-NEXT: vmerge.vim v10, v10, 1, v0
+; LMULMAX1-NEXT: vmv.v.i v8, 0
+; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0
; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
-; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v11, v9
-; LMULMAX1-NEXT: vand.vi v9, v11, 1
+; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v10, v9
+; LMULMAX1-NEXT: vand.vi v9, v10, 1
; LMULMAX1-NEXT: vmsne.vi v0, v9, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu
-; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0
+; LMULMAX1-NEXT: vmv.v.i v9, 0
+; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0
; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v10, v8, 4
+; LMULMAX1-NEXT: vslideup.vi v8, v9, 4
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
-; LMULMAX1-NEXT: vmsne.vi v0, v10, 0
+; LMULMAX1-NEXT: vmsne.vi v0, v8, 0
; LMULMAX1-NEXT: ret
%z = fptosi <8 x float> %x to <8 x i1>
ret <8 x i1> %z
@@ -163,32 +155,24 @@ define <8 x i1> @fp2ui_v8f32_v8i1(<8 x float> %x) {
;
; LMULMAX1-LABEL: fp2ui_v8f32_v8i1:
; LMULMAX1: # %bb.0:
-; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; LMULMAX1-NEXT: vmclr.m v0
-; LMULMAX1-NEXT: vmv.v.i v10, 0
-; LMULMAX1-NEXT: vmerge.vim v11, v10, 1, v0
; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
-; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v12, v8
-; LMULMAX1-NEXT: vand.vi v8, v12, 1
+; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v10, v8
+; LMULMAX1-NEXT: vand.vi v8, v10, 1
; LMULMAX1-NEXT: vmsne.vi v0, v8, 0
-; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu
-; LMULMAX1-NEXT: vmv.v.i v8, 0
-; LMULMAX1-NEXT: vmerge.vim v12, v8, 1, v0
-; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v11, v12, 0
; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; LMULMAX1-NEXT: vmsne.vi v0, v11, 0
-; LMULMAX1-NEXT: vmerge.vim v10, v10, 1, v0
+; LMULMAX1-NEXT: vmv.v.i v8, 0
+; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0
; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
-; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v11, v9
-; LMULMAX1-NEXT: vand.vi v9, v11, 1
+; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v10, v9
+; LMULMAX1-NEXT: vand.vi v9, v10, 1
; LMULMAX1-NEXT: vmsne.vi v0, v9, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu
-; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0
+; LMULMAX1-NEXT: vmv.v.i v9, 0
+; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0
; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v10, v8, 4
+; LMULMAX1-NEXT: vslideup.vi v8, v9, 4
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
-; LMULMAX1-NEXT: vmsne.vi v0, v10, 0
+; LMULMAX1-NEXT: vmsne.vi v0, v8, 0
; LMULMAX1-NEXT: ret
%z = fptoui <8 x float> %x to <8 x i1>
ret <8 x i1> %z
@@ -448,26 +432,22 @@ define void @fp2si_v8f64_v8i8(<8 x double>* %x, <8 x i8>* %y) {
; LMULMAX1-NEXT: vnsrl.wi v10, v12, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0
-; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; LMULMAX1-NEXT: vmv.v.i v12, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf2, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v10, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v10, v11
+; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v12, v11
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0
+; LMULMAX1-NEXT: vnsrl.wi v11, v12, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0
+; LMULMAX1-NEXT: vnsrl.wi v11, v11, 0
; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v10, 2
+; LMULMAX1-NEXT: vslideup.vi v10, v11, 2
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v10, v9
+; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v11, v9
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v9, v10, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vsetivli zero, 6, e8, mf2, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v9, 4
+; LMULMAX1-NEXT: vslideup.vi v10, v9, 4
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v9, v8
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
@@ -475,8 +455,8 @@ define void @fp2si_v8f64_v8i8(<8 x double>* %x, <8 x i8>* %y) {
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v8, 6
-; LMULMAX1-NEXT: vse8.v v12, (a1)
+; LMULMAX1-NEXT: vslideup.vi v10, v8, 6
+; LMULMAX1-NEXT: vse8.v v10, (a1)
; LMULMAX1-NEXT: ret
%a = load <8 x double>, <8 x double>* %x
%d = fptosi <8 x double> %a to <8 x i8>
@@ -514,26 +494,22 @@ define void @fp2ui_v8f64_v8i8(<8 x double>* %x, <8 x i8>* %y) {
; LMULMAX1-NEXT: vnsrl.wi v10, v12, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0
-; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; LMULMAX1-NEXT: vmv.v.i v12, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf2, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v10, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v10, v11
+; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v12, v11
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0
+; LMULMAX1-NEXT: vnsrl.wi v11, v12, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0
+; LMULMAX1-NEXT: vnsrl.wi v11, v11, 0
; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v10, 2
+; LMULMAX1-NEXT: vslideup.vi v10, v11, 2
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v10, v9
+; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v11, v9
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v9, v10, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vsetivli zero, 6, e8, mf2, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v9, 4
+; LMULMAX1-NEXT: vslideup.vi v10, v9, 4
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v9, v8
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
@@ -541,8 +517,8 @@ define void @fp2ui_v8f64_v8i8(<8 x double>* %x, <8 x i8>* %y) {
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v8, 6
-; LMULMAX1-NEXT: vse8.v v12, (a1)
+; LMULMAX1-NEXT: vslideup.vi v10, v8, 6
+; LMULMAX1-NEXT: vse8.v v10, (a1)
; LMULMAX1-NEXT: ret
%a = load <8 x double>, <8 x double>* %x
%d = fptoui <8 x double> %a to <8 x i8>
@@ -561,54 +537,46 @@ define <8 x i1> @fp2si_v8f64_v8i1(<8 x double> %x) {
;
; LMULMAX1-LABEL: fp2si_v8f64_v8i1:
; LMULMAX1: # %bb.0:
-; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; LMULMAX1-NEXT: vmclr.m v0
-; LMULMAX1-NEXT: vmv.v.i v12, 0
-; LMULMAX1-NEXT: vmerge.vim v13, v12, 1, v0
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v14, v8
-; LMULMAX1-NEXT: vand.vi v8, v14, 1
+; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v12, v8
+; LMULMAX1-NEXT: vand.vi v8, v12, 1
; LMULMAX1-NEXT: vmsne.vi v0, v8, 0
-; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT: vmv.v.i v8, 0
-; LMULMAX1-NEXT: vmerge.vim v14, v8, 1, v0
-; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf2, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v13, v14, 0
; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; LMULMAX1-NEXT: vmsne.vi v0, v13, 0
-; LMULMAX1-NEXT: vmerge.vim v13, v12, 1, v0
+; LMULMAX1-NEXT: vmv.v.i v8, 0
+; LMULMAX1-NEXT: vmerge.vim v12, v8, 1, v0
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v14, v9
-; LMULMAX1-NEXT: vand.vi v9, v14, 1
+; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v13, v9
+; LMULMAX1-NEXT: vand.vi v9, v13, 1
; LMULMAX1-NEXT: vmsne.vi v0, v9, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT: vmerge.vim v9, v8, 1, v0
+; LMULMAX1-NEXT: vmv.v.i v9, 0
+; LMULMAX1-NEXT: vmerge.vim v13, v9, 1, v0
; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v13, v9, 2
+; LMULMAX1-NEXT: vslideup.vi v12, v13, 2
; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; LMULMAX1-NEXT: vmsne.vi v0, v13, 0
-; LMULMAX1-NEXT: vmerge.vim v9, v12, 1, v0
+; LMULMAX1-NEXT: vmsne.vi v0, v12, 0
+; LMULMAX1-NEXT: vmerge.vim v12, v8, 1, v0
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v13, v10
; LMULMAX1-NEXT: vand.vi v10, v13, 1
; LMULMAX1-NEXT: vmsne.vi v0, v10, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT: vmerge.vim v10, v8, 1, v0
+; LMULMAX1-NEXT: vmerge.vim v10, v9, 1, v0
; LMULMAX1-NEXT: vsetivli zero, 6, e8, mf2, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v9, v10, 4
+; LMULMAX1-NEXT: vslideup.vi v12, v10, 4
; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; LMULMAX1-NEXT: vmsne.vi v0, v9, 0
-; LMULMAX1-NEXT: vmerge.vim v9, v12, 1, v0
+; LMULMAX1-NEXT: vmsne.vi v0, v12, 0
+; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v10, v11
; LMULMAX1-NEXT: vand.vi v10, v10, 1
; LMULMAX1-NEXT: vmsne.vi v0, v10, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0
+; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0
; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v9, v8, 6
+; LMULMAX1-NEXT: vslideup.vi v8, v9, 6
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
-; LMULMAX1-NEXT: vmsne.vi v0, v9, 0
+; LMULMAX1-NEXT: vmsne.vi v0, v8, 0
; LMULMAX1-NEXT: ret
%z = fptosi <8 x double> %x to <8 x i1>
ret <8 x i1> %z
@@ -625,54 +593,46 @@ define <8 x i1> @fp2ui_v8f64_v8i1(<8 x double> %x) {
;
; LMULMAX1-LABEL: fp2ui_v8f64_v8i1:
; LMULMAX1: # %bb.0:
-; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; LMULMAX1-NEXT: vmclr.m v0
-; LMULMAX1-NEXT: vmv.v.i v12, 0
-; LMULMAX1-NEXT: vmerge.vim v13, v12, 1, v0
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v14, v8
-; LMULMAX1-NEXT: vand.vi v8, v14, 1
+; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v12, v8
+; LMULMAX1-NEXT: vand.vi v8, v12, 1
; LMULMAX1-NEXT: vmsne.vi v0, v8, 0
-; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT: vmv.v.i v8, 0
-; LMULMAX1-NEXT: vmerge.vim v14, v8, 1, v0
-; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf2, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v13, v14, 0
; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; LMULMAX1-NEXT: vmsne.vi v0, v13, 0
-; LMULMAX1-NEXT: vmerge.vim v13, v12, 1, v0
+; LMULMAX1-NEXT: vmv.v.i v8, 0
+; LMULMAX1-NEXT: vmerge.vim v12, v8, 1, v0
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v14, v9
-; LMULMAX1-NEXT: vand.vi v9, v14, 1
+; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v13, v9
+; LMULMAX1-NEXT: vand.vi v9, v13, 1
; LMULMAX1-NEXT: vmsne.vi v0, v9, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT: vmerge.vim v9, v8, 1, v0
+; LMULMAX1-NEXT: vmv.v.i v9, 0
+; LMULMAX1-NEXT: vmerge.vim v13, v9, 1, v0
; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v13, v9, 2
+; LMULMAX1-NEXT: vslideup.vi v12, v13, 2
; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; LMULMAX1-NEXT: vmsne.vi v0, v13, 0
-; LMULMAX1-NEXT: vmerge.vim v9, v12, 1, v0
+; LMULMAX1-NEXT: vmsne.vi v0, v12, 0
+; LMULMAX1-NEXT: vmerge.vim v12, v8, 1, v0
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v13, v10
; LMULMAX1-NEXT: vand.vi v10, v13, 1
; LMULMAX1-NEXT: vmsne.vi v0, v10, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT: vmerge.vim v10, v8, 1, v0
+; LMULMAX1-NEXT: vmerge.vim v10, v9, 1, v0
; LMULMAX1-NEXT: vsetivli zero, 6, e8, mf2, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v9, v10, 4
+; LMULMAX1-NEXT: vslideup.vi v12, v10, 4
; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; LMULMAX1-NEXT: vmsne.vi v0, v9, 0
-; LMULMAX1-NEXT: vmerge.vim v9, v12, 1, v0
+; LMULMAX1-NEXT: vmsne.vi v0, v12, 0
+; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v10, v11
; LMULMAX1-NEXT: vand.vi v10, v10, 1
; LMULMAX1-NEXT: vmsne.vi v0, v10, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0
+; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0
; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v9, v8, 6
+; LMULMAX1-NEXT: vslideup.vi v8, v9, 6
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
-; LMULMAX1-NEXT: vmsne.vi v0, v9, 0
+; LMULMAX1-NEXT: vmsne.vi v0, v8, 0
; LMULMAX1-NEXT: ret
%z = fptoui <8 x double> %x to <8 x i1>
ret <8 x i1> %z
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
index 820c955016db6..8d496e40a466a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
@@ -478,29 +478,25 @@ define void @si2fp_v8i64_v8f16(<8 x i64>* %x, <8 x half>* %y) {
; LMULMAX1-NEXT: vfncvt.f.x.w v12, v10
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; LMULMAX1-NEXT: vfncvt.f.f.w v10, v12
-; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu
-; LMULMAX1-NEXT: vmv.v.i v12, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e16, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v10, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vfncvt.f.x.w v10, v11
+; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; LMULMAX1-NEXT: vfncvt.f.x.w v12, v11
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT: vfncvt.f.f.w v11, v10
+; LMULMAX1-NEXT: vfncvt.f.f.w v11, v12
; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v11, 2
+; LMULMAX1-NEXT: vslideup.vi v10, v11, 2
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vfncvt.f.x.w v10, v9
+; LMULMAX1-NEXT: vfncvt.f.x.w v11, v9
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT: vfncvt.f.f.w v9, v10
+; LMULMAX1-NEXT: vfncvt.f.f.w v9, v11
; LMULMAX1-NEXT: vsetivli zero, 6, e16, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v9, 4
+; LMULMAX1-NEXT: vslideup.vi v10, v9, 4
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; LMULMAX1-NEXT: vfncvt.f.x.w v9, v8
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; LMULMAX1-NEXT: vfncvt.f.f.w v8, v9
; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v8, 6
-; LMULMAX1-NEXT: vse16.v v12, (a1)
+; LMULMAX1-NEXT: vslideup.vi v10, v8, 6
+; LMULMAX1-NEXT: vse16.v v10, (a1)
; LMULMAX1-NEXT: ret
%a = load <8 x i64>, <8 x i64>* %x
%d = sitofp <8 x i64> %a to <8 x half>
@@ -534,29 +530,25 @@ define void @ui2fp_v8i64_v8f16(<8 x i64>* %x, <8 x half>* %y) {
; LMULMAX1-NEXT: vfncvt.f.xu.w v12, v10
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; LMULMAX1-NEXT: vfncvt.f.f.w v10, v12
-; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu
-; LMULMAX1-NEXT: vmv.v.i v12, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e16, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v10, 0
-; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vfncvt.f.xu.w v10, v11
+; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; LMULMAX1-NEXT: vfncvt.f.xu.w v12, v11
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT: vfncvt.f.f.w v11, v10
+; LMULMAX1-NEXT: vfncvt.f.f.w v11, v12
; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v11, 2
+; LMULMAX1-NEXT: vslideup.vi v10, v11, 2
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT: vfncvt.f.xu.w v10, v9
+; LMULMAX1-NEXT: vfncvt.f.xu.w v11, v9
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT: vfncvt.f.f.w v9, v10
+; LMULMAX1-NEXT: vfncvt.f.f.w v9, v11
; LMULMAX1-NEXT: vsetivli zero, 6, e16, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v9, 4
+; LMULMAX1-NEXT: vslideup.vi v10, v9, 4
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; LMULMAX1-NEXT: vfncvt.f.xu.w v9, v8
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; LMULMAX1-NEXT: vfncvt.f.f.w v8, v9
; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v12, v8, 6
-; LMULMAX1-NEXT: vse16.v v12, (a1)
+; LMULMAX1-NEXT: vslideup.vi v10, v8, 6
+; LMULMAX1-NEXT: vse16.v v10, (a1)
; LMULMAX1-NEXT: ret
%a = load <8 x i64>, <8 x i64>* %x
%d = uitofp <8 x i64> %a to <8 x half>
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
index b1bf02d22a38a..01a8870ab2216 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
@@ -152,11 +152,7 @@ define void @insert_v4i32_undef_v2i32_0(<4 x i32>* %vp, <2 x i32>* %svp) {
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; CHECK-NEXT: vle32.v v8, (a1)
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT: vmv.v.i v9, 0
-; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu
-; CHECK-NEXT: vslideup.vi v9, v8, 0
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT: vse32.v v9, (a0)
+; CHECK-NEXT: vse32.v v8, (a0)
; CHECK-NEXT: ret
%sv = load <2 x i32>, <2 x i32>* %svp
%v = call <4 x i32> @llvm.experimental.vector.insert.v2i32.v4i32(<4 x i32> undef, <2 x i32> %sv, i64 0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
index ed7da45c78b5b..79195533f43c7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
@@ -43,28 +43,24 @@ define void @insertelt_v3i64(<3 x i64>* %x, i64 %y) {
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu
; RV32-NEXT: vle64.v v8, (a0)
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
-; RV32-NEXT: vmv.v.i v10, 0
-; RV32-NEXT: vsetivli zero, 2, e64, m2, tu, mu
-; RV32-NEXT: vslideup.vi v10, v8, 0
; RV32-NEXT: lw a3, 16(a0)
; RV32-NEXT: addi a4, a0, 20
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; RV32-NEXT: vlse32.v v8, (a4), zero
+; RV32-NEXT: vlse32.v v10, (a4), zero
; RV32-NEXT: vsetvli zero, zero, e32, m1, tu, mu
-; RV32-NEXT: vmv.s.x v8, a3
+; RV32-NEXT: vmv.s.x v10, a3
; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, mu
-; RV32-NEXT: vslideup.vi v10, v8, 2
+; RV32-NEXT: vslideup.vi v8, v10, 2
; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, mu
-; RV32-NEXT: vmv.v.i v8, 0
-; RV32-NEXT: vslide1up.vx v12, v8, a2
-; RV32-NEXT: vslide1up.vx v8, v12, a1
+; RV32-NEXT: vmv.v.i v10, 0
+; RV32-NEXT: vslide1up.vx v12, v10, a2
+; RV32-NEXT: vslide1up.vx v10, v12, a1
; RV32-NEXT: vsetivli zero, 3, e64, m2, tu, mu
-; RV32-NEXT: vslideup.vi v10, v8, 2
+; RV32-NEXT: vslideup.vi v8, v10, 2
; RV32-NEXT: sw a1, 16(a0)
; RV32-NEXT: sw a2, 20(a0)
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu
-; RV32-NEXT: vse64.v v10, (a0)
+; RV32-NEXT: vse64.v v8, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: insertelt_v3i64:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll
index c71cabbf5c0bd..89dd2bded9402 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll
@@ -213,17 +213,13 @@ define void @trunc_v8i8_v8i32(<8 x i32>* %x, <8 x i8>* %z) {
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; LMULMAX1-NEXT: vmv.v.i v10, 0
-; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v10, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0
+; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu
-; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v10, v8, 4
-; LMULMAX1-NEXT: vse8.v v10, (a1)
+; LMULMAX1-NEXT: vslideup.vi v8, v9, 4
+; LMULMAX1-NEXT: vse8.v v8, (a1)
; LMULMAX1-NEXT: ret
%a = load <8 x i32>, <8 x i32>* %x
%b = trunc <8 x i32> %a to <8 x i8>
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 615a27cdb71ae..4af789ee9963d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -2173,28 +2173,24 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
;
; RV64-LABEL: mgather_baseidx_v32i8:
; RV64: # %bb.0:
-; RV64-NEXT: vmv1r.v v12, v0
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT: vsext.vf8 v16, v8
+; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu
+; RV64-NEXT: vmv1r.v v12, v10
+; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t
; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, mu
-; RV64-NEXT: vslidedown.vi v14, v10, 16
-; RV64-NEXT: vslidedown.vi v16, v8, 16
+; RV64-NEXT: vslidedown.vi v10, v10, 16
+; RV64-NEXT: vslidedown.vi v8, v8, 16
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
-; RV64-NEXT: vsext.vf8 v24, v16
+; RV64-NEXT: vsext.vf8 v16, v8
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV64-NEXT: vslidedown.vi v0, v0, 2
; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu
-; RV64-NEXT: vluxei64.v v14, (a0), v24, v0.t
-; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; RV64-NEXT: vsext.vf8 v16, v8
-; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu
-; RV64-NEXT: vmv1r.v v0, v12
; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t
; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsetvli zero, a0, e8, m2, ta, mu
-; RV64-NEXT: vmv.v.i v8, 0
-; RV64-NEXT: vsetivli zero, 16, e8, m2, tu, mu
-; RV64-NEXT: vslideup.vi v8, v10, 0
; RV64-NEXT: vsetvli zero, a0, e8, m2, tu, mu
-; RV64-NEXT: vslideup.vi v8, v14, 16
+; RV64-NEXT: vslideup.vi v12, v10, 16
+; RV64-NEXT: vmv2r.v v8, v12
; RV64-NEXT: ret
%ptrs = getelementptr inbounds i8, i8* %base, <32 x i8> %idxs
%v = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> %ptrs, i32 2, <32 x i1> %m, <32 x i8> %passthru)
More information about the llvm-commits
mailing list