[llvm] [RISCV] Start vslide1down sequence with a dependency breaking splat (PR #72691)
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 17 11:18:44 PST 2023
https://github.com/preames created https://github.com/llvm/llvm-project/pull/72691
If we are using entirely vslide1downs to initialize an otherwise undef vector, we end up with an implicit_def as the source of the first vslide1down. This register has to be allocated, and creates false dependencies with surrounding code.
Instead, start our sequence with a vmv.v.x in the hopes of creating a dependency breaking idiom. Unfortunately, it's not clear this will actually work as due to the VL=0 special case for T.A. the hardware has to work pretty hard to recognize that the vmv.v.x actually has no source dependence. I don't think we can reasonable expect all hardware to have optimized this case, but I also don't see any downside in prefering it.
>From c52fd6bfbac6a3c567866f9a534d87238e22b599 Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Fri, 17 Nov 2023 11:14:13 -0800
Subject: [PATCH] [RISCV] Start vslide1down sequence with a dependency breaking
splat
If we are using entirely vslide1downs to initialize an otherwise
undef vector, we end up with an implicit_def as the source of the
first vslide1down. This register has to be allocated, and creates
false dependencies with surrounding code.
Instead, start our sequence with a vmv.v.x in the hopes of creating
a dependency breaking idiom. Unfortunately, it's not clear this
will actually work as due to the VL=0 special case for T.A. the
hardware has to work pretty hard to recognize that the vmv.v.x
actually has no source dependence. I don't think we can reasonable
expect all hardware to have optimized this case, but I also don't
see any downside in prefering it.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 13 +-
.../RISCV/rvv/fixed-vectors-bitcast.ll | 4 +-
.../rvv/fixed-vectors-buildvec-of-binop.ll | 68 +++---
.../RISCV/rvv/fixed-vectors-fp-buildvec.ll | 12 +-
.../RISCV/rvv/fixed-vectors-fp2i-sat.ll | 66 +++---
.../CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll | 72 +++---
.../CodeGen/RISCV/rvv/fixed-vectors-insert.ll | 4 +-
.../CodeGen/RISCV/rvv/fixed-vectors-llrint.ll | 66 +++---
.../CodeGen/RISCV/rvv/fixed-vectors-load.ll | 4 +-
.../CodeGen/RISCV/rvv/fixed-vectors-lrint.ll | 222 +++++++++---------
.../RISCV/rvv/fixed-vectors-mask-buildvec.ll | 28 +--
.../RISCV/rvv/fixed-vectors-masked-gather.ll | 14 +-
.../RISCV/rvv/fixed-vectors-masked-scatter.ll | 23 +-
.../RISCV/rvv/fixed-vectors-vselect.ll | 24 +-
.../test/CodeGen/RISCV/rvv/fold-vector-cmp.ll | 2 +-
.../CodeGen/RISCV/srem-seteq-illegal-types.ll | 4 +-
.../CodeGen/RISCV/urem-seteq-illegal-types.ll | 8 +-
17 files changed, 303 insertions(+), 331 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f89f300a4e9e50c..2c7f2748472a175 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -3798,13 +3798,24 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
- SDValue Vec = DAG.getUNDEF(ContainerVT);
+ SDValue Vec;
UndefCount = 0;
for (SDValue V : Op->ops()) {
if (V.isUndef()) {
UndefCount++;
continue;
}
+
+ // Start our sequence with a TA splat in the hopes that hardware is able to
+ // recognize there's no dependency on the prior value of our temporary
+ // register.
+ if (!Vec) {
+ Vec = DAG.getSplatVector(VT, DL, V);
+ Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
+ UndefCount = 0;
+ continue;
+ }
+
if (UndefCount) {
const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll
index 47edb9eecb00bc1..bfa4c0adf6eb7a2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll
@@ -495,7 +495,7 @@ define <4 x i16> @bitcast_i64_v4i16(i64 %a) {
; RV32ELEN32-LABEL: bitcast_i64_v4i16:
; RV32ELEN32: # %bb.0:
; RV32ELEN32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; RV32ELEN32-NEXT: vslide1down.vx v8, v8, a0
+; RV32ELEN32-NEXT: vmv.v.x v8, a0
; RV32ELEN32-NEXT: vslide1down.vx v8, v8, a1
; RV32ELEN32-NEXT: ret
;
@@ -530,7 +530,7 @@ define <2 x i32> @bitcast_i64_v2i32(i64 %a) {
; RV32ELEN32-LABEL: bitcast_i64_v2i32:
; RV32ELEN32: # %bb.0:
; RV32ELEN32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; RV32ELEN32-NEXT: vslide1down.vx v8, v8, a0
+; RV32ELEN32-NEXT: vmv.v.x v8, a0
; RV32ELEN32-NEXT: vslide1down.vx v8, v8, a1
; RV32ELEN32-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
index 2ffca983ac1023f..d0ff85d1179cfdb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
@@ -6,7 +6,7 @@ define <4 x i32> @add_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: add_constant_rhs:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: lui a0, %hi(.LCPI0_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI0_0)
; CHECK-NEXT: vle32.v v9, (a0)
@@ -30,7 +30,7 @@ define <8 x i32> @add_constant_rhs_8xi32(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e,
; CHECK-LABEL: add_constant_rhs_8xi32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: vslide1down.vx v8, v8, a1
; CHECK-NEXT: vslide1down.vx v8, v8, a2
; CHECK-NEXT: vslide1down.vx v8, v8, a3
@@ -67,7 +67,7 @@ define <4 x i32> @sub_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: sub_constant_rhs:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: lui a0, %hi(.LCPI2_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI2_0)
; CHECK-NEXT: vle32.v v9, (a0)
@@ -91,7 +91,7 @@ define <4 x i32> @mul_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: mul_constant_rhs:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: lui a0, %hi(.LCPI3_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI3_0)
; CHECK-NEXT: vle32.v v9, (a0)
@@ -115,7 +115,7 @@ define <4 x i32> @udiv_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: udiv_constant_rhs:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: lui a0, %hi(.LCPI4_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI4_0)
; CHECK-NEXT: vle32.v v9, (a0)
@@ -152,7 +152,7 @@ define <4 x float> @fadd_constant_rhs(float %a, float %b, float %c, float %d) {
; CHECK-LABEL: fadd_constant_rhs:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: vfmv.v.f v8, fa0
; CHECK-NEXT: lui a0, %hi(.LCPI5_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_0)
; CHECK-NEXT: vle32.v v9, (a0)
@@ -176,7 +176,7 @@ define <4 x float> @fdiv_constant_rhs(float %a, float %b, float %c, float %d) {
; CHECK-LABEL: fdiv_constant_rhs:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: vfmv.v.f v8, fa0
; CHECK-NEXT: lui a0, %hi(.LCPI6_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0)
; CHECK-NEXT: vle32.v v9, (a0)
@@ -200,7 +200,7 @@ define <4 x i32> @add_constant_rhs_splat(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: add_constant_rhs_splat:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: vslide1down.vx v8, v8, a1
; CHECK-NEXT: vslide1down.vx v8, v8, a2
; CHECK-NEXT: vslide1down.vx v8, v8, a3
@@ -226,7 +226,7 @@ define <4 x i32> @add_constant_rhs_with_identity(i32 %a, i32 %b, i32 %c, i32 %d)
; RV32-NEXT: addi a3, a3, 2047
; RV32-NEXT: addi a3, a3, 308
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vmv.v.x v8, a0
; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: vslide1down.vx v8, v8, a2
; RV32-NEXT: vslide1down.vx v8, v8, a3
@@ -239,7 +239,7 @@ define <4 x i32> @add_constant_rhs_with_identity(i32 %a, i32 %b, i32 %c, i32 %d)
; RV64-NEXT: addi a3, a3, 2047
; RV64-NEXT: addiw a3, a3, 308
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: vmv.v.x v8, a0
; RV64-NEXT: vslide1down.vx v8, v8, a1
; RV64-NEXT: vslide1down.vx v8, v8, a2
; RV64-NEXT: vslide1down.vx v8, v8, a3
@@ -263,7 +263,7 @@ define <4 x i32> @add_constant_rhs_identity(i32 %a, i32 %b, i32 %c, i32 %d) {
; RV32-NEXT: addi a3, a3, 2047
; RV32-NEXT: addi a3, a3, 308
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vmv.v.x v8, a0
; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: vslide1down.vx v8, v8, a2
; RV32-NEXT: vslide1down.vx v8, v8, a3
@@ -276,7 +276,7 @@ define <4 x i32> @add_constant_rhs_identity(i32 %a, i32 %b, i32 %c, i32 %d) {
; RV64-NEXT: addi a3, a3, 2047
; RV64-NEXT: addiw a3, a3, 308
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: vmv.v.x v8, a0
; RV64-NEXT: vslide1down.vx v8, v8, a1
; RV64-NEXT: vslide1down.vx v8, v8, a2
; RV64-NEXT: vslide1down.vx v8, v8, a3
@@ -293,25 +293,15 @@ define <4 x i32> @add_constant_rhs_identity(i32 %a, i32 %b, i32 %c, i32 %d) {
}
define <4 x i32> @add_constant_rhs_identity2(i32 %a, i32 %b, i32 %c, i32 %d) {
-; RV32-LABEL: add_constant_rhs_identity2:
-; RV32: # %bb.0:
-; RV32-NEXT: addi a0, a0, 23
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: vslide1down.vx v8, v8, a2
-; RV32-NEXT: vslide1down.vx v8, v8, a3
-; RV32-NEXT: ret
-;
-; RV64-LABEL: add_constant_rhs_identity2:
-; RV64: # %bb.0:
-; RV64-NEXT: addiw a0, a0, 23
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v8, a0
-; RV64-NEXT: vslide1down.vx v8, v8, a1
-; RV64-NEXT: vslide1down.vx v8, v8, a2
-; RV64-NEXT: vslide1down.vx v8, v8, a3
-; RV64-NEXT: ret
+; CHECK-LABEL: add_constant_rhs_identity2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a0, a0, 23
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: vslide1down.vx v8, v8, a1
+; CHECK-NEXT: vslide1down.vx v8, v8, a2
+; CHECK-NEXT: vslide1down.vx v8, v8, a3
+; CHECK-NEXT: ret
%e0 = add i32 %a, 23
%v0 = insertelement <4 x i32> poison, i32 %e0, i32 0
%v1 = insertelement <4 x i32> %v0, i32 %b, i32 1
@@ -324,7 +314,7 @@ define <4 x i32> @add_constant_rhs_inverse(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: add_constant_rhs_inverse:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: lui a0, %hi(.LCPI11_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI11_0)
; CHECK-NEXT: vle32.v v9, (a0)
@@ -348,7 +338,7 @@ define <4 x i32> @add_constant_rhs_commute(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: add_constant_rhs_commute:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: lui a0, %hi(.LCPI12_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI12_0)
; CHECK-NEXT: vle32.v v9, (a0)
@@ -377,7 +367,7 @@ define <4 x i32> @add_general_rhs(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f
; RV32-NEXT: add a2, a2, a6
; RV32-NEXT: add a3, a3, a7
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vmv.v.x v8, a0
; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: vslide1down.vx v8, v8, a2
; RV32-NEXT: vslide1down.vx v8, v8, a3
@@ -385,12 +375,12 @@ define <4 x i32> @add_general_rhs(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f
;
; RV64-LABEL: add_general_rhs:
; RV64: # %bb.0:
-; RV64-NEXT: addw a0, a0, a4
+; RV64-NEXT: add a0, a0, a4
; RV64-NEXT: addw a1, a1, a5
; RV64-NEXT: addw a2, a2, a6
; RV64-NEXT: addw a3, a3, a7
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: vmv.v.x v8, a0
; RV64-NEXT: vslide1down.vx v8, v8, a1
; RV64-NEXT: vslide1down.vx v8, v8, a2
; RV64-NEXT: vslide1down.vx v8, v8, a3
@@ -414,7 +404,7 @@ define <4 x i32> @add_general_splat(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
; RV32-NEXT: add a2, a2, a4
; RV32-NEXT: add a3, a3, a4
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vmv.v.x v8, a0
; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: vslide1down.vx v8, v8, a2
; RV32-NEXT: vslide1down.vx v8, v8, a3
@@ -422,12 +412,12 @@ define <4 x i32> @add_general_splat(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
;
; RV64-LABEL: add_general_splat:
; RV64: # %bb.0:
-; RV64-NEXT: addw a0, a0, a4
+; RV64-NEXT: add a0, a0, a4
; RV64-NEXT: addw a1, a1, a4
; RV64-NEXT: addw a2, a2, a4
; RV64-NEXT: addw a3, a3, a4
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: vmv.v.x v8, a0
; RV64-NEXT: vslide1down.vx v8, v8, a1
; RV64-NEXT: vslide1down.vx v8, v8, a2
; RV64-NEXT: vslide1down.vx v8, v8, a3
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index 2d8bae7092242d3..05aa5f9807b9fc4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -252,7 +252,7 @@ define <2 x half> @buildvec_v2f16(half %a, half %b) {
; CHECK-LABEL: buildvec_v2f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: vfmv.v.f v8, fa0
; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
; CHECK-NEXT: ret
%v1 = insertelement <2 x half> poison, half %a, i64 0
@@ -264,7 +264,7 @@ define <2 x float> @buildvec_v2f32(float %a, float %b) {
; CHECK-LABEL: buildvec_v2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: vfmv.v.f v8, fa0
; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
; CHECK-NEXT: ret
%v1 = insertelement <2 x float> poison, float %a, i64 0
@@ -276,7 +276,7 @@ define <2 x double> @buildvec_v2f64(double %a, double %b) {
; CHECK-LABEL: buildvec_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: vfmv.v.f v8, fa0
; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
; CHECK-NEXT: ret
%v1 = insertelement <2 x double> poison, double %a, i64 0
@@ -288,7 +288,7 @@ define <2 x double> @buildvec_v2f64_b(double %a, double %b) {
; CHECK-LABEL: buildvec_v2f64_b:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: vfmv.v.f v8, fa0
; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
; CHECK-NEXT: ret
%v1 = insertelement <2 x double> poison, double %b, i64 1
@@ -300,7 +300,7 @@ define <4 x float> @buildvec_v4f32(float %a, float %b, float %c, float %d) {
; CHECK-LABEL: buildvec_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: vfmv.v.f v8, fa0
; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
; CHECK-NEXT: vfslide1down.vf v8, v8, fa2
; CHECK-NEXT: vfslide1down.vf v8, v8, fa3
@@ -316,7 +316,7 @@ define <8 x float> @buildvec_v8f32(float %e0, float %e1, float %e2, float %e3, f
; CHECK-LABEL: buildvec_v8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: vfmv.v.f v8, fa0
; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
; CHECK-NEXT: vfslide1down.vf v8, v8, fa2
; CHECK-NEXT: vfslide1down.vf v8, v8, fa3
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
index 645b53727a059b9..ed0b15c6add5cd0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
@@ -211,7 +211,7 @@ define void @fp2si_v2f64_v2i8(ptr %x, ptr %y) {
; RV32-NEXT: fcvt.w.d a3, fa5, rtz
; RV32-NEXT: and a2, a2, a3
; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: vmv.v.x v8, a2
; RV32-NEXT: vslide1down.vx v8, v8, a0
; RV32-NEXT: vse8.v v8, (a1)
; RV32-NEXT: ret
@@ -234,13 +234,13 @@ define void @fp2si_v2f64_v2i8(ptr %x, ptr %y) {
; RV64-NEXT: and a0, a0, a2
; RV64-NEXT: vfmv.f.s fa5, v8
; RV64-NEXT: feq.d a2, fa5, fa5
-; RV64-NEXT: neg a2, a2
+; RV64-NEXT: negw a2, a2
; RV64-NEXT: fmax.d fa5, fa5, fa4
; RV64-NEXT: fmin.d fa5, fa5, fa3
; RV64-NEXT: fcvt.l.d a3, fa5, rtz
; RV64-NEXT: and a2, a2, a3
; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: vmv.v.x v8, a2
; RV64-NEXT: vslide1down.vx v8, v8, a0
; RV64-NEXT: vse8.v v8, (a1)
; RV64-NEXT: ret
@@ -256,23 +256,21 @@ define void @fp2ui_v2f64_v2i8(ptr %x, ptr %y) {
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vle64.v v8, (a0)
+; RV32-NEXT: vslidedown.vi v9, v8, 1
; RV32-NEXT: lui a0, %hi(.LCPI11_0)
; RV32-NEXT: fld fa5, %lo(.LCPI11_0)(a0)
-; RV32-NEXT: vfmv.f.s fa4, v8
+; RV32-NEXT: vfmv.f.s fa4, v9
; RV32-NEXT: fcvt.d.w fa3, zero
; RV32-NEXT: fmax.d fa4, fa4, fa3
; RV32-NEXT: fmin.d fa4, fa4, fa5
; RV32-NEXT: fcvt.wu.d a0, fa4, rtz
-; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
-; RV32-NEXT: vslide1down.vx v9, v8, a0
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 1
; RV32-NEXT: vfmv.f.s fa4, v8
; RV32-NEXT: fmax.d fa4, fa4, fa3
; RV32-NEXT: fmin.d fa5, fa4, fa5
-; RV32-NEXT: fcvt.wu.d a0, fa5, rtz
-; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v9, a0
+; RV32-NEXT: fcvt.wu.d a2, fa5, rtz
+; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; RV32-NEXT: vmv.v.x v8, a2
+; RV32-NEXT: vslide1down.vx v8, v8, a0
; RV32-NEXT: vse8.v v8, (a1)
; RV32-NEXT: ret
;
@@ -280,23 +278,21 @@ define void @fp2ui_v2f64_v2i8(ptr %x, ptr %y) {
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: vslidedown.vi v9, v8, 1
; RV64-NEXT: lui a0, %hi(.LCPI11_0)
; RV64-NEXT: fld fa5, %lo(.LCPI11_0)(a0)
-; RV64-NEXT: vfmv.f.s fa4, v8
+; RV64-NEXT: vfmv.f.s fa4, v9
; RV64-NEXT: fmv.d.x fa3, zero
; RV64-NEXT: fmax.d fa4, fa4, fa3
; RV64-NEXT: fmin.d fa4, fa4, fa5
; RV64-NEXT: fcvt.lu.d a0, fa4, rtz
-; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
-; RV64-NEXT: vslide1down.vx v9, v8, a0
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 1
; RV64-NEXT: vfmv.f.s fa4, v8
; RV64-NEXT: fmax.d fa4, fa4, fa3
; RV64-NEXT: fmin.d fa5, fa4, fa5
-; RV64-NEXT: fcvt.lu.d a0, fa5, rtz
-; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v9, a0
+; RV64-NEXT: fcvt.lu.d a2, fa5, rtz
+; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; RV64-NEXT: vmv.v.x v8, a2
+; RV64-NEXT: vslide1down.vx v8, v8, a0
; RV64-NEXT: vse8.v v8, (a1)
; RV64-NEXT: ret
%a = load <2 x double>, ptr %x
@@ -344,7 +340,7 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT: fcvt.w.d a3, fa3, rtz
; RV32-NEXT: and a2, a2, a3
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT: vslide1down.vx v10, v8, a2
+; RV32-NEXT: vmv.v.x v10, a2
; RV32-NEXT: vslide1down.vx v10, v10, a0
; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV32-NEXT: vslidedown.vi v12, v8, 2
@@ -437,13 +433,13 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
; RV64-NEXT: and a0, a0, a2
; RV64-NEXT: vfmv.f.s fa3, v8
; RV64-NEXT: feq.d a2, fa3, fa3
-; RV64-NEXT: neg a2, a2
+; RV64-NEXT: negw a2, a2
; RV64-NEXT: fmax.d fa3, fa3, fa5
; RV64-NEXT: fmin.d fa3, fa3, fa4
; RV64-NEXT: fcvt.l.d a3, fa3, rtz
; RV64-NEXT: and a2, a2, a3
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT: vslide1down.vx v10, v8, a2
+; RV64-NEXT: vmv.v.x v10, a2
; RV64-NEXT: vslide1down.vx v10, v10, a0
; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV64-NEXT: vslidedown.vi v12, v8, 2
@@ -529,22 +525,21 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT: vle64.v v8, (a0)
; RV32-NEXT: mv a0, sp
; RV32-NEXT: vse64.v v8, (a0)
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v8, 1
; RV32-NEXT: lui a0, %hi(.LCPI13_0)
; RV32-NEXT: fld fa5, %lo(.LCPI13_0)(a0)
-; RV32-NEXT: vfmv.f.s fa4, v8
+; RV32-NEXT: vfmv.f.s fa4, v10
; RV32-NEXT: fcvt.d.w fa3, zero
; RV32-NEXT: fmax.d fa4, fa4, fa3
; RV32-NEXT: fmin.d fa4, fa4, fa5
; RV32-NEXT: fcvt.wu.d a0, fa4, rtz
-; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32-NEXT: vslide1down.vx v10, v8, a0
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v11, v8, 1
-; RV32-NEXT: vfmv.f.s fa4, v11
+; RV32-NEXT: vfmv.f.s fa4, v8
; RV32-NEXT: fmax.d fa4, fa4, fa3
; RV32-NEXT: fmin.d fa4, fa4, fa5
-; RV32-NEXT: fcvt.wu.d a0, fa4, rtz
+; RV32-NEXT: fcvt.wu.d a2, fa4, rtz
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a2
; RV32-NEXT: vslide1down.vx v10, v10, a0
; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV32-NEXT: vslidedown.vi v12, v8, 2
@@ -604,22 +599,21 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV64-NEXT: vle64.v v8, (a0)
; RV64-NEXT: mv a0, sp
; RV64-NEXT: vse64.v v8, (a0)
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v10, v8, 1
; RV64-NEXT: lui a0, %hi(.LCPI13_0)
; RV64-NEXT: fld fa5, %lo(.LCPI13_0)(a0)
-; RV64-NEXT: vfmv.f.s fa4, v8
+; RV64-NEXT: vfmv.f.s fa4, v10
; RV64-NEXT: fmv.d.x fa3, zero
; RV64-NEXT: fmax.d fa4, fa4, fa3
; RV64-NEXT: fmin.d fa4, fa4, fa5
; RV64-NEXT: fcvt.lu.d a0, fa4, rtz
-; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64-NEXT: vslide1down.vx v10, v8, a0
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v11, v8, 1
-; RV64-NEXT: vfmv.f.s fa4, v11
+; RV64-NEXT: vfmv.f.s fa4, v8
; RV64-NEXT: fmax.d fa4, fa4, fa3
; RV64-NEXT: fmin.d fa4, fa4, fa5
-; RV64-NEXT: fcvt.lu.d a0, fa4, rtz
+; RV64-NEXT: fcvt.lu.d a2, fa4, rtz
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vmv.v.x v10, a2
; RV64-NEXT: vslide1down.vx v10, v10, a0
; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV64-NEXT: vslidedown.vi v12, v8, 2
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
index 1952789b4073344..ec11ada12eaa769 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
@@ -134,12 +134,12 @@ define <3 x float> @si2fp_v3i1_v3f32(<3 x i1> %x) {
define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) {
; LMULMAX8RV32-LABEL: si2fp_v3i7_v3f32:
; LMULMAX8RV32: # %bb.0:
-; LMULMAX8RV32-NEXT: lw a1, 0(a0)
+; LMULMAX8RV32-NEXT: lw a1, 4(a0)
+; LMULMAX8RV32-NEXT: lw a2, 0(a0)
; LMULMAX8RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX8RV32-NEXT: lw a2, 4(a0)
; LMULMAX8RV32-NEXT: lw a0, 8(a0)
+; LMULMAX8RV32-NEXT: vmv.v.x v8, a2
; LMULMAX8RV32-NEXT: vslide1down.vx v8, v8, a1
-; LMULMAX8RV32-NEXT: vslide1down.vx v8, v8, a2
; LMULMAX8RV32-NEXT: vslide1down.vx v8, v8, a0
; LMULMAX8RV32-NEXT: vslidedown.vi v8, v8, 1
; LMULMAX8RV32-NEXT: vadd.vv v8, v8, v8
@@ -151,12 +151,12 @@ define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) {
;
; LMULMAX8RV64-LABEL: si2fp_v3i7_v3f32:
; LMULMAX8RV64: # %bb.0:
-; LMULMAX8RV64-NEXT: ld a1, 0(a0)
+; LMULMAX8RV64-NEXT: ld a1, 8(a0)
+; LMULMAX8RV64-NEXT: ld a2, 0(a0)
; LMULMAX8RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX8RV64-NEXT: ld a2, 8(a0)
; LMULMAX8RV64-NEXT: ld a0, 16(a0)
+; LMULMAX8RV64-NEXT: vmv.v.x v8, a2
; LMULMAX8RV64-NEXT: vslide1down.vx v8, v8, a1
-; LMULMAX8RV64-NEXT: vslide1down.vx v8, v8, a2
; LMULMAX8RV64-NEXT: vslide1down.vx v8, v8, a0
; LMULMAX8RV64-NEXT: vslidedown.vi v8, v8, 1
; LMULMAX8RV64-NEXT: vadd.vv v8, v8, v8
@@ -168,12 +168,12 @@ define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) {
;
; LMULMAX1RV32-LABEL: si2fp_v3i7_v3f32:
; LMULMAX1RV32: # %bb.0:
-; LMULMAX1RV32-NEXT: lw a1, 0(a0)
+; LMULMAX1RV32-NEXT: lw a1, 4(a0)
+; LMULMAX1RV32-NEXT: lw a2, 0(a0)
; LMULMAX1RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1RV32-NEXT: lw a2, 4(a0)
; LMULMAX1RV32-NEXT: lw a0, 8(a0)
+; LMULMAX1RV32-NEXT: vmv.v.x v8, a2
; LMULMAX1RV32-NEXT: vslide1down.vx v8, v8, a1
-; LMULMAX1RV32-NEXT: vslide1down.vx v8, v8, a2
; LMULMAX1RV32-NEXT: vslide1down.vx v8, v8, a0
; LMULMAX1RV32-NEXT: vslidedown.vi v8, v8, 1
; LMULMAX1RV32-NEXT: vadd.vv v8, v8, v8
@@ -185,12 +185,12 @@ define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) {
;
; LMULMAX1RV64-LABEL: si2fp_v3i7_v3f32:
; LMULMAX1RV64: # %bb.0:
-; LMULMAX1RV64-NEXT: ld a1, 0(a0)
+; LMULMAX1RV64-NEXT: ld a1, 8(a0)
+; LMULMAX1RV64-NEXT: ld a2, 0(a0)
; LMULMAX1RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1RV64-NEXT: ld a2, 8(a0)
; LMULMAX1RV64-NEXT: ld a0, 16(a0)
+; LMULMAX1RV64-NEXT: vmv.v.x v8, a2
; LMULMAX1RV64-NEXT: vslide1down.vx v8, v8, a1
-; LMULMAX1RV64-NEXT: vslide1down.vx v8, v8, a2
; LMULMAX1RV64-NEXT: vslide1down.vx v8, v8, a0
; LMULMAX1RV64-NEXT: vslidedown.vi v8, v8, 1
; LMULMAX1RV64-NEXT: vadd.vv v8, v8, v8
@@ -202,12 +202,12 @@ define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) {
;
; LMULMAX8RV32ZVFHMIN-LABEL: si2fp_v3i7_v3f32:
; LMULMAX8RV32ZVFHMIN: # %bb.0:
-; LMULMAX8RV32ZVFHMIN-NEXT: lw a1, 0(a0)
+; LMULMAX8RV32ZVFHMIN-NEXT: lw a1, 4(a0)
+; LMULMAX8RV32ZVFHMIN-NEXT: lw a2, 0(a0)
; LMULMAX8RV32ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX8RV32ZVFHMIN-NEXT: lw a2, 4(a0)
; LMULMAX8RV32ZVFHMIN-NEXT: lw a0, 8(a0)
+; LMULMAX8RV32ZVFHMIN-NEXT: vmv.v.x v8, a2
; LMULMAX8RV32ZVFHMIN-NEXT: vslide1down.vx v8, v8, a1
-; LMULMAX8RV32ZVFHMIN-NEXT: vslide1down.vx v8, v8, a2
; LMULMAX8RV32ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0
; LMULMAX8RV32ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
; LMULMAX8RV32ZVFHMIN-NEXT: vadd.vv v8, v8, v8
@@ -219,12 +219,12 @@ define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) {
;
; LMULMAX8RV64ZVFHMIN-LABEL: si2fp_v3i7_v3f32:
; LMULMAX8RV64ZVFHMIN: # %bb.0:
-; LMULMAX8RV64ZVFHMIN-NEXT: ld a1, 0(a0)
+; LMULMAX8RV64ZVFHMIN-NEXT: ld a1, 8(a0)
+; LMULMAX8RV64ZVFHMIN-NEXT: ld a2, 0(a0)
; LMULMAX8RV64ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX8RV64ZVFHMIN-NEXT: ld a2, 8(a0)
; LMULMAX8RV64ZVFHMIN-NEXT: ld a0, 16(a0)
+; LMULMAX8RV64ZVFHMIN-NEXT: vmv.v.x v8, a2
; LMULMAX8RV64ZVFHMIN-NEXT: vslide1down.vx v8, v8, a1
-; LMULMAX8RV64ZVFHMIN-NEXT: vslide1down.vx v8, v8, a2
; LMULMAX8RV64ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0
; LMULMAX8RV64ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
; LMULMAX8RV64ZVFHMIN-NEXT: vadd.vv v8, v8, v8
@@ -241,12 +241,12 @@ define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) {
define <3 x float> @ui2fp_v3i7_v3f32(<3 x i7> %x) {
; LMULMAX8RV32-LABEL: ui2fp_v3i7_v3f32:
; LMULMAX8RV32: # %bb.0:
-; LMULMAX8RV32-NEXT: lw a1, 0(a0)
+; LMULMAX8RV32-NEXT: lw a1, 4(a0)
+; LMULMAX8RV32-NEXT: lw a2, 0(a0)
; LMULMAX8RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX8RV32-NEXT: lw a2, 4(a0)
; LMULMAX8RV32-NEXT: lw a0, 8(a0)
+; LMULMAX8RV32-NEXT: vmv.v.x v8, a2
; LMULMAX8RV32-NEXT: vslide1down.vx v8, v8, a1
-; LMULMAX8RV32-NEXT: vslide1down.vx v8, v8, a2
; LMULMAX8RV32-NEXT: vslide1down.vx v8, v8, a0
; LMULMAX8RV32-NEXT: vslidedown.vi v8, v8, 1
; LMULMAX8RV32-NEXT: li a0, 127
@@ -258,12 +258,12 @@ define <3 x float> @ui2fp_v3i7_v3f32(<3 x i7> %x) {
;
; LMULMAX8RV64-LABEL: ui2fp_v3i7_v3f32:
; LMULMAX8RV64: # %bb.0:
-; LMULMAX8RV64-NEXT: ld a1, 0(a0)
+; LMULMAX8RV64-NEXT: ld a1, 8(a0)
+; LMULMAX8RV64-NEXT: ld a2, 0(a0)
; LMULMAX8RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX8RV64-NEXT: ld a2, 8(a0)
; LMULMAX8RV64-NEXT: ld a0, 16(a0)
+; LMULMAX8RV64-NEXT: vmv.v.x v8, a2
; LMULMAX8RV64-NEXT: vslide1down.vx v8, v8, a1
-; LMULMAX8RV64-NEXT: vslide1down.vx v8, v8, a2
; LMULMAX8RV64-NEXT: vslide1down.vx v8, v8, a0
; LMULMAX8RV64-NEXT: vslidedown.vi v8, v8, 1
; LMULMAX8RV64-NEXT: li a0, 127
@@ -275,12 +275,12 @@ define <3 x float> @ui2fp_v3i7_v3f32(<3 x i7> %x) {
;
; LMULMAX1RV32-LABEL: ui2fp_v3i7_v3f32:
; LMULMAX1RV32: # %bb.0:
-; LMULMAX1RV32-NEXT: lw a1, 0(a0)
+; LMULMAX1RV32-NEXT: lw a1, 4(a0)
+; LMULMAX1RV32-NEXT: lw a2, 0(a0)
; LMULMAX1RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1RV32-NEXT: lw a2, 4(a0)
; LMULMAX1RV32-NEXT: lw a0, 8(a0)
+; LMULMAX1RV32-NEXT: vmv.v.x v8, a2
; LMULMAX1RV32-NEXT: vslide1down.vx v8, v8, a1
-; LMULMAX1RV32-NEXT: vslide1down.vx v8, v8, a2
; LMULMAX1RV32-NEXT: vslide1down.vx v8, v8, a0
; LMULMAX1RV32-NEXT: vslidedown.vi v8, v8, 1
; LMULMAX1RV32-NEXT: li a0, 127
@@ -292,12 +292,12 @@ define <3 x float> @ui2fp_v3i7_v3f32(<3 x i7> %x) {
;
; LMULMAX1RV64-LABEL: ui2fp_v3i7_v3f32:
; LMULMAX1RV64: # %bb.0:
-; LMULMAX1RV64-NEXT: ld a1, 0(a0)
+; LMULMAX1RV64-NEXT: ld a1, 8(a0)
+; LMULMAX1RV64-NEXT: ld a2, 0(a0)
; LMULMAX1RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1RV64-NEXT: ld a2, 8(a0)
; LMULMAX1RV64-NEXT: ld a0, 16(a0)
+; LMULMAX1RV64-NEXT: vmv.v.x v8, a2
; LMULMAX1RV64-NEXT: vslide1down.vx v8, v8, a1
-; LMULMAX1RV64-NEXT: vslide1down.vx v8, v8, a2
; LMULMAX1RV64-NEXT: vslide1down.vx v8, v8, a0
; LMULMAX1RV64-NEXT: vslidedown.vi v8, v8, 1
; LMULMAX1RV64-NEXT: li a0, 127
@@ -309,12 +309,12 @@ define <3 x float> @ui2fp_v3i7_v3f32(<3 x i7> %x) {
;
; LMULMAX8RV32ZVFHMIN-LABEL: ui2fp_v3i7_v3f32:
; LMULMAX8RV32ZVFHMIN: # %bb.0:
-; LMULMAX8RV32ZVFHMIN-NEXT: lw a1, 0(a0)
+; LMULMAX8RV32ZVFHMIN-NEXT: lw a1, 4(a0)
+; LMULMAX8RV32ZVFHMIN-NEXT: lw a2, 0(a0)
; LMULMAX8RV32ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX8RV32ZVFHMIN-NEXT: lw a2, 4(a0)
; LMULMAX8RV32ZVFHMIN-NEXT: lw a0, 8(a0)
+; LMULMAX8RV32ZVFHMIN-NEXT: vmv.v.x v8, a2
; LMULMAX8RV32ZVFHMIN-NEXT: vslide1down.vx v8, v8, a1
-; LMULMAX8RV32ZVFHMIN-NEXT: vslide1down.vx v8, v8, a2
; LMULMAX8RV32ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0
; LMULMAX8RV32ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
; LMULMAX8RV32ZVFHMIN-NEXT: li a0, 127
@@ -326,12 +326,12 @@ define <3 x float> @ui2fp_v3i7_v3f32(<3 x i7> %x) {
;
; LMULMAX8RV64ZVFHMIN-LABEL: ui2fp_v3i7_v3f32:
; LMULMAX8RV64ZVFHMIN: # %bb.0:
-; LMULMAX8RV64ZVFHMIN-NEXT: ld a1, 0(a0)
+; LMULMAX8RV64ZVFHMIN-NEXT: ld a1, 8(a0)
+; LMULMAX8RV64ZVFHMIN-NEXT: ld a2, 0(a0)
; LMULMAX8RV64ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX8RV64ZVFHMIN-NEXT: ld a2, 8(a0)
; LMULMAX8RV64ZVFHMIN-NEXT: ld a0, 16(a0)
+; LMULMAX8RV64ZVFHMIN-NEXT: vmv.v.x v8, a2
; LMULMAX8RV64ZVFHMIN-NEXT: vslide1down.vx v8, v8, a1
-; LMULMAX8RV64ZVFHMIN-NEXT: vslide1down.vx v8, v8, a2
; LMULMAX8RV64ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0
; LMULMAX8RV64ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
; LMULMAX8RV64ZVFHMIN-NEXT: li a0, 127
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
index 3cc7371c1ce9ac4..1c9aa9813e12f07 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
@@ -230,7 +230,7 @@ define <3 x i64> @insertelt_v3i64(<3 x i64> %a, i64 %y) {
; RV32-NEXT: vmv.x.s a4, v9
; RV32-NEXT: vmv.x.s a5, v8
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a5
+; RV32-NEXT: vmv.v.x v8, a5
; RV32-NEXT: vslide1down.vx v8, v8, a4
; RV32-NEXT: vslide1down.vx v8, v8, a3
; RV32-NEXT: vslide1down.vx v8, v8, a2
@@ -246,7 +246,7 @@ define <3 x i64> @insertelt_v3i64(<3 x i64> %a, i64 %y) {
; RV64-NEXT: vmv.x.s a1, v9
; RV64-NEXT: vmv.x.s a2, v8
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: vmv.v.x v8, a2
; RV64-NEXT: vslide1down.vx v8, v8, a1
; RV64-NEXT: vslide1down.vx v8, v8, a0
; RV64-NEXT: vslidedown.vi v8, v8, 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
index 5d34cd6592702e2..7c5047bbdf63526 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
@@ -15,7 +15,7 @@ define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
; RV32-NEXT: vfmv.f.s fa0, v8
; RV32-NEXT: call llrintf at plt
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vmv.v.x v8, a0
; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 16
@@ -51,7 +51,7 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
; RV32-NEXT: vfmv.f.s fa0, v8
; RV32-NEXT: call llrintf at plt
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vmv.v.x v8, a0
; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: add a0, sp, a0
@@ -80,16 +80,14 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
; RV64-LABEL: llrint_v2i64_v2f32:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: vslidedown.vi v9, v8, 1
+; RV64-NEXT: vfmv.f.s fa5, v9
; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT: vslide1down.vx v9, v8, a0
-; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 1
; RV64-NEXT: vfmv.f.s fa5, v8
-; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: fcvt.l.s a1, fa5
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v9, a0
+; RV64-NEXT: vmv.v.x v8, a1
+; RV64-NEXT: vslide1down.vx v8, v8, a0
; RV64-NEXT: ret
%a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x)
ret <2 x i64> %a
@@ -116,7 +114,7 @@ define <3 x i64> @llrint_v3i64_v3f32(<3 x float> %x) {
; RV32-NEXT: vfmv.f.s fa0, v8
; RV32-NEXT: call llrintf at plt
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vmv.v.x v8, a0
; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: addi a0, sp, 16
; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
@@ -176,15 +174,13 @@ define <3 x i64> @llrint_v3i64_v3f32(<3 x float> %x) {
; RV64-LABEL: llrint_v3i64_v3f32:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vfmv.f.s fa5, v8
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT: vslide1down.vx v10, v8, a0
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v9, v8, 1
; RV64-NEXT: vfmv.f.s fa5, v9
; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.s a1, fa5
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vmv.v.x v10, a1
; RV64-NEXT: vslide1down.vx v10, v10, a0
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v9, v8, 2
@@ -224,7 +220,7 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
; RV32-NEXT: vfmv.f.s fa0, v8
; RV32-NEXT: call llrintf at plt
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vmv.v.x v8, a0
; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: addi a0, sp, 16
; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
@@ -284,15 +280,13 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
; RV64-LABEL: llrint_v4i64_v4f32:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vfmv.f.s fa5, v8
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT: vslide1down.vx v10, v8, a0
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v9, v8, 1
; RV64-NEXT: vfmv.f.s fa5, v9
; RV64-NEXT: fcvt.l.s a0, fa5
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.s a1, fa5
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vmv.v.x v10, a1
; RV64-NEXT: vslide1down.vx v10, v10, a0
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v9, v8, 2
@@ -676,7 +670,7 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
; RV32-NEXT: vfmv.f.s fa0, v8
; RV32-NEXT: call llrint at plt
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vmv.v.x v8, a0
; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 16
@@ -711,7 +705,7 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
; RV32-NEXT: vfmv.f.s fa0, v8
; RV32-NEXT: call llrint at plt
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vmv.v.x v8, a0
; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: add a0, sp, a0
@@ -739,14 +733,15 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
;
; RV64-LABEL: llrint_v2i64_v2f64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v8, 1
+; RV64-NEXT: vfmv.f.s fa5, v9
; RV64-NEXT: fcvt.l.d a0, fa5
-; RV64-NEXT: vslide1down.vx v9, v8, a0
-; RV64-NEXT: vslidedown.vi v8, v8, 1
; RV64-NEXT: vfmv.f.s fa5, v8
-; RV64-NEXT: fcvt.l.d a0, fa5
-; RV64-NEXT: vslide1down.vx v8, v9, a0
+; RV64-NEXT: fcvt.l.d a1, fa5
+; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT: vmv.v.x v8, a1
+; RV64-NEXT: vslide1down.vx v8, v8, a0
; RV64-NEXT: ret
%a = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> %x)
ret <2 x i64> %a
@@ -773,7 +768,7 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
; RV32-NEXT: vfmv.f.s fa0, v8
; RV32-NEXT: call llrint at plt
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vmv.v.x v8, a0
; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: addi a0, sp, 16
; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
@@ -832,15 +827,14 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
;
; RV64-LABEL: llrint_v4i64_v4f64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT: vfmv.f.s fa5, v8
-; RV64-NEXT: fcvt.l.d a0, fa5
-; RV64-NEXT: vslide1down.vx v10, v8, a0
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v12, v8, 1
-; RV64-NEXT: vfmv.f.s fa5, v12
+; RV64-NEXT: vslidedown.vi v10, v8, 1
+; RV64-NEXT: vfmv.f.s fa5, v10
; RV64-NEXT: fcvt.l.d a0, fa5
+; RV64-NEXT: vfmv.f.s fa5, v8
+; RV64-NEXT: fcvt.l.d a1, fa5
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vmv.v.x v10, a1
; RV64-NEXT: vslide1down.vx v10, v10, a0
; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV64-NEXT: vslidedown.vi v12, v8, 2
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll
index e05b9a699662f0d..caf0ae603fda9c0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll
@@ -97,7 +97,7 @@ define <6 x i1> @load_v6i1(ptr %p) {
; RV32-NEXT: srli a5, a5, 31
; RV32-NEXT: andi a0, a0, 1
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vmv.v.x v8, a0
; RV32-NEXT: vslide1down.vx v8, v8, a5
; RV32-NEXT: vslide1down.vx v8, v8, a4
; RV32-NEXT: vslide1down.vx v8, v8, a3
@@ -122,7 +122,7 @@ define <6 x i1> @load_v6i1(ptr %p) {
; RV64-NEXT: srli a5, a5, 63
; RV64-NEXT: andi a0, a0, 1
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: vmv.v.x v8, a0
; RV64-NEXT: vslide1down.vx v8, v8, a5
; RV64-NEXT: vslide1down.vx v8, v8, a4
; RV64-NEXT: vslide1down.vx v8, v8, a3
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll
index 7cb864546cebcb9..224f5066138cdec 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll
@@ -39,41 +39,41 @@ declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float>)
define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
; RV32-LABEL: lrint_v2f32:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vfmv.f.s fa5, v8
+; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 1
+; RV32-NEXT: vfmv.f.s fa5, v9
; RV32-NEXT: fcvt.w.s a0, fa5
-; RV32-NEXT: vslide1down.vx v9, v8, a0
-; RV32-NEXT: vslidedown.vi v8, v8, 1
; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: fcvt.w.s a0, fa5
-; RV32-NEXT: vslide1down.vx v8, v9, a0
+; RV32-NEXT: fcvt.w.s a1, fa5
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-NEXT: vmv.v.x v8, a1
+; RV32-NEXT: vslide1down.vx v8, v8, a0
; RV32-NEXT: ret
;
; RV64-i32-LABEL: lrint_v2f32:
; RV64-i32: # %bb.0:
-; RV64-i32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV64-i32-NEXT: vfmv.f.s fa5, v8
+; RV64-i32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV64-i32-NEXT: vslidedown.vi v9, v8, 1
+; RV64-i32-NEXT: vfmv.f.s fa5, v9
; RV64-i32-NEXT: fcvt.l.s a0, fa5
-; RV64-i32-NEXT: vslide1down.vx v9, v8, a0
-; RV64-i32-NEXT: vslidedown.vi v8, v8, 1
; RV64-i32-NEXT: vfmv.f.s fa5, v8
-; RV64-i32-NEXT: fcvt.l.s a0, fa5
-; RV64-i32-NEXT: vslide1down.vx v8, v9, a0
+; RV64-i32-NEXT: fcvt.l.s a1, fa5
+; RV64-i32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV64-i32-NEXT: vmv.v.x v8, a1
+; RV64-i32-NEXT: vslide1down.vx v8, v8, a0
; RV64-i32-NEXT: ret
;
; RV64-i64-LABEL: lrint_v2f32:
; RV64-i64: # %bb.0:
; RV64-i64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV64-i64-NEXT: vfmv.f.s fa5, v8
+; RV64-i64-NEXT: vslidedown.vi v9, v8, 1
+; RV64-i64-NEXT: vfmv.f.s fa5, v9
; RV64-i64-NEXT: fcvt.l.s a0, fa5
-; RV64-i64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-i64-NEXT: vslide1down.vx v9, v8, a0
-; RV64-i64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV64-i64-NEXT: vslidedown.vi v8, v8, 1
; RV64-i64-NEXT: vfmv.f.s fa5, v8
-; RV64-i64-NEXT: fcvt.l.s a0, fa5
+; RV64-i64-NEXT: fcvt.l.s a1, fa5
; RV64-i64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-i64-NEXT: vslide1down.vx v8, v9, a0
+; RV64-i64-NEXT: vmv.v.x v8, a1
+; RV64-i64-NEXT: vslide1down.vx v8, v8, a0
; RV64-i64-NEXT: ret
%a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float> %x)
ret <2 x iXLen> %a
@@ -83,13 +83,14 @@ declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float>)
define <3 x iXLen> @lrint_v3f32(<3 x float> %x) {
; RV32-LABEL: lrint_v3f32:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: fcvt.w.s a0, fa5
-; RV32-NEXT: vslide1down.vx v9, v8, a0
-; RV32-NEXT: vslidedown.vi v10, v8, 1
-; RV32-NEXT: vfmv.f.s fa5, v10
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 1
+; RV32-NEXT: vfmv.f.s fa5, v9
; RV32-NEXT: fcvt.w.s a0, fa5
+; RV32-NEXT: vfmv.f.s fa5, v8
+; RV32-NEXT: fcvt.w.s a1, fa5
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vslide1down.vx v9, v9, a0
; RV32-NEXT: vslidedown.vi v10, v8, 2
; RV32-NEXT: vfmv.f.s fa5, v10
@@ -103,13 +104,14 @@ define <3 x iXLen> @lrint_v3f32(<3 x float> %x) {
;
; RV64-i32-LABEL: lrint_v3f32:
; RV64-i32: # %bb.0:
-; RV64-i32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-i32-NEXT: vfmv.f.s fa5, v8
-; RV64-i32-NEXT: fcvt.l.s a0, fa5
-; RV64-i32-NEXT: vslide1down.vx v9, v8, a0
-; RV64-i32-NEXT: vslidedown.vi v10, v8, 1
-; RV64-i32-NEXT: vfmv.f.s fa5, v10
+; RV64-i32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-i32-NEXT: vslidedown.vi v9, v8, 1
+; RV64-i32-NEXT: vfmv.f.s fa5, v9
; RV64-i32-NEXT: fcvt.l.s a0, fa5
+; RV64-i32-NEXT: vfmv.f.s fa5, v8
+; RV64-i32-NEXT: fcvt.l.s a1, fa5
+; RV64-i32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-i32-NEXT: vmv.v.x v9, a1
; RV64-i32-NEXT: vslide1down.vx v9, v9, a0
; RV64-i32-NEXT: vslidedown.vi v10, v8, 2
; RV64-i32-NEXT: vfmv.f.s fa5, v10
@@ -124,15 +126,13 @@ define <3 x iXLen> @lrint_v3f32(<3 x float> %x) {
; RV64-i64-LABEL: lrint_v3f32:
; RV64-i64: # %bb.0:
; RV64-i64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-i64-NEXT: vfmv.f.s fa5, v8
-; RV64-i64-NEXT: fcvt.l.s a0, fa5
-; RV64-i64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-i64-NEXT: vslide1down.vx v10, v8, a0
-; RV64-i64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-i64-NEXT: vslidedown.vi v9, v8, 1
; RV64-i64-NEXT: vfmv.f.s fa5, v9
; RV64-i64-NEXT: fcvt.l.s a0, fa5
+; RV64-i64-NEXT: vfmv.f.s fa5, v8
+; RV64-i64-NEXT: fcvt.l.s a1, fa5
; RV64-i64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-i64-NEXT: vmv.v.x v10, a1
; RV64-i64-NEXT: vslide1down.vx v10, v10, a0
; RV64-i64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-i64-NEXT: vslidedown.vi v9, v8, 2
@@ -155,13 +155,14 @@ declare <3 x iXLen> @llvm.lrint.v3iXLen.v3f32(<3 x float>)
define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
; RV32-LABEL: lrint_v4f32:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: fcvt.w.s a0, fa5
-; RV32-NEXT: vslide1down.vx v9, v8, a0
-; RV32-NEXT: vslidedown.vi v10, v8, 1
-; RV32-NEXT: vfmv.f.s fa5, v10
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 1
+; RV32-NEXT: vfmv.f.s fa5, v9
; RV32-NEXT: fcvt.w.s a0, fa5
+; RV32-NEXT: vfmv.f.s fa5, v8
+; RV32-NEXT: fcvt.w.s a1, fa5
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
; RV32-NEXT: vslide1down.vx v9, v9, a0
; RV32-NEXT: vslidedown.vi v10, v8, 2
; RV32-NEXT: vfmv.f.s fa5, v10
@@ -175,13 +176,14 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
;
; RV64-i32-LABEL: lrint_v4f32:
; RV64-i32: # %bb.0:
-; RV64-i32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-i32-NEXT: vfmv.f.s fa5, v8
-; RV64-i32-NEXT: fcvt.l.s a0, fa5
-; RV64-i32-NEXT: vslide1down.vx v9, v8, a0
-; RV64-i32-NEXT: vslidedown.vi v10, v8, 1
-; RV64-i32-NEXT: vfmv.f.s fa5, v10
+; RV64-i32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-i32-NEXT: vslidedown.vi v9, v8, 1
+; RV64-i32-NEXT: vfmv.f.s fa5, v9
; RV64-i32-NEXT: fcvt.l.s a0, fa5
+; RV64-i32-NEXT: vfmv.f.s fa5, v8
+; RV64-i32-NEXT: fcvt.l.s a1, fa5
+; RV64-i32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-i32-NEXT: vmv.v.x v9, a1
; RV64-i32-NEXT: vslide1down.vx v9, v9, a0
; RV64-i32-NEXT: vslidedown.vi v10, v8, 2
; RV64-i32-NEXT: vfmv.f.s fa5, v10
@@ -196,15 +198,13 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
; RV64-i64-LABEL: lrint_v4f32:
; RV64-i64: # %bb.0:
; RV64-i64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-i64-NEXT: vfmv.f.s fa5, v8
-; RV64-i64-NEXT: fcvt.l.s a0, fa5
-; RV64-i64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-i64-NEXT: vslide1down.vx v10, v8, a0
-; RV64-i64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-i64-NEXT: vslidedown.vi v9, v8, 1
; RV64-i64-NEXT: vfmv.f.s fa5, v9
; RV64-i64-NEXT: fcvt.l.s a0, fa5
+; RV64-i64-NEXT: vfmv.f.s fa5, v8
+; RV64-i64-NEXT: fcvt.l.s a1, fa5
; RV64-i64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-i64-NEXT: vmv.v.x v10, a1
; RV64-i64-NEXT: vslide1down.vx v10, v10, a0
; RV64-i64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-i64-NEXT: vslidedown.vi v9, v8, 2
@@ -227,15 +227,14 @@ declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float>)
define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
; RV32-LABEL: lrint_v8f32:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: fcvt.w.s a0, fa5
-; RV32-NEXT: vslide1down.vx v10, v8, a0
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v8, 1
-; RV32-NEXT: vfmv.f.s fa5, v12
+; RV32-NEXT: vslidedown.vi v10, v8, 1
+; RV32-NEXT: vfmv.f.s fa5, v10
; RV32-NEXT: fcvt.w.s a0, fa5
+; RV32-NEXT: vfmv.f.s fa5, v8
+; RV32-NEXT: fcvt.w.s a1, fa5
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vslide1down.vx v10, v10, a0
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32-NEXT: vslidedown.vi v12, v8, 2
@@ -277,15 +276,14 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
;
; RV64-i32-LABEL: lrint_v8f32:
; RV64-i32: # %bb.0:
-; RV64-i32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV64-i32-NEXT: vfmv.f.s fa5, v8
-; RV64-i32-NEXT: fcvt.l.s a0, fa5
-; RV64-i32-NEXT: vslide1down.vx v10, v8, a0
; RV64-i32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-i32-NEXT: vslidedown.vi v12, v8, 1
-; RV64-i32-NEXT: vfmv.f.s fa5, v12
+; RV64-i32-NEXT: vslidedown.vi v10, v8, 1
+; RV64-i32-NEXT: vfmv.f.s fa5, v10
; RV64-i32-NEXT: fcvt.l.s a0, fa5
+; RV64-i32-NEXT: vfmv.f.s fa5, v8
+; RV64-i32-NEXT: fcvt.l.s a1, fa5
; RV64-i32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64-i32-NEXT: vmv.v.x v10, a1
; RV64-i32-NEXT: vslide1down.vx v10, v10, a0
; RV64-i32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-i32-NEXT: vslidedown.vi v12, v8, 2
@@ -423,43 +421,40 @@ define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
; RV32-LABEL: lrint_v2f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vfmv.f.s fa5, v8
+; RV32-NEXT: vslidedown.vi v9, v8, 1
+; RV32-NEXT: vfmv.f.s fa5, v9
; RV32-NEXT: fcvt.w.d a0, fa5
-; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vslide1down.vx v9, v8, a0
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 1
; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: fcvt.w.d a0, fa5
+; RV32-NEXT: fcvt.w.d a1, fa5
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v9, a0
+; RV32-NEXT: vmv.v.x v8, a1
+; RV32-NEXT: vslide1down.vx v8, v8, a0
; RV32-NEXT: ret
;
; RV64-i32-LABEL: lrint_v2f64:
; RV64-i32: # %bb.0:
; RV64-i32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-i32-NEXT: vfmv.f.s fa5, v8
+; RV64-i32-NEXT: vslidedown.vi v9, v8, 1
+; RV64-i32-NEXT: vfmv.f.s fa5, v9
; RV64-i32-NEXT: fcvt.l.d a0, fa5
-; RV64-i32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV64-i32-NEXT: vslide1down.vx v9, v8, a0
-; RV64-i32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-i32-NEXT: vslidedown.vi v8, v8, 1
; RV64-i32-NEXT: vfmv.f.s fa5, v8
-; RV64-i32-NEXT: fcvt.l.d a0, fa5
+; RV64-i32-NEXT: fcvt.l.d a1, fa5
; RV64-i32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV64-i32-NEXT: vslide1down.vx v8, v9, a0
+; RV64-i32-NEXT: vmv.v.x v8, a1
+; RV64-i32-NEXT: vslide1down.vx v8, v8, a0
; RV64-i32-NEXT: ret
;
; RV64-i64-LABEL: lrint_v2f64:
; RV64-i64: # %bb.0:
-; RV64-i64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-i64-NEXT: vfmv.f.s fa5, v8
+; RV64-i64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-i64-NEXT: vslidedown.vi v9, v8, 1
+; RV64-i64-NEXT: vfmv.f.s fa5, v9
; RV64-i64-NEXT: fcvt.l.d a0, fa5
-; RV64-i64-NEXT: vslide1down.vx v9, v8, a0
-; RV64-i64-NEXT: vslidedown.vi v8, v8, 1
; RV64-i64-NEXT: vfmv.f.s fa5, v8
-; RV64-i64-NEXT: fcvt.l.d a0, fa5
-; RV64-i64-NEXT: vslide1down.vx v8, v9, a0
+; RV64-i64-NEXT: fcvt.l.d a1, fa5
+; RV64-i64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64-i64-NEXT: vmv.v.x v8, a1
+; RV64-i64-NEXT: vslide1down.vx v8, v8, a0
; RV64-i64-NEXT: ret
%a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double> %x)
ret <2 x iXLen> %a
@@ -470,15 +465,13 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
; RV32-LABEL: lrint_v4f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: fcvt.w.d a0, fa5
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vslide1down.vx v10, v8, a0
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v11, v8, 1
-; RV32-NEXT: vfmv.f.s fa5, v11
+; RV32-NEXT: vslidedown.vi v10, v8, 1
+; RV32-NEXT: vfmv.f.s fa5, v10
; RV32-NEXT: fcvt.w.d a0, fa5
+; RV32-NEXT: vfmv.f.s fa5, v8
+; RV32-NEXT: fcvt.w.d a1, fa5
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vslide1down.vx v10, v10, a0
; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV32-NEXT: vslidedown.vi v12, v8, 2
@@ -497,15 +490,13 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
; RV64-i32-LABEL: lrint_v4f64:
; RV64-i32: # %bb.0:
; RV64-i32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-i32-NEXT: vfmv.f.s fa5, v8
-; RV64-i32-NEXT: fcvt.l.d a0, fa5
-; RV64-i32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-i32-NEXT: vslide1down.vx v10, v8, a0
-; RV64-i32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-i32-NEXT: vslidedown.vi v11, v8, 1
-; RV64-i32-NEXT: vfmv.f.s fa5, v11
+; RV64-i32-NEXT: vslidedown.vi v10, v8, 1
+; RV64-i32-NEXT: vfmv.f.s fa5, v10
; RV64-i32-NEXT: fcvt.l.d a0, fa5
+; RV64-i32-NEXT: vfmv.f.s fa5, v8
+; RV64-i32-NEXT: fcvt.l.d a1, fa5
; RV64-i32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-i32-NEXT: vmv.v.x v10, a1
; RV64-i32-NEXT: vslide1down.vx v10, v10, a0
; RV64-i32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV64-i32-NEXT: vslidedown.vi v12, v8, 2
@@ -523,15 +514,14 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
;
; RV64-i64-LABEL: lrint_v4f64:
; RV64-i64: # %bb.0:
-; RV64-i64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-i64-NEXT: vfmv.f.s fa5, v8
-; RV64-i64-NEXT: fcvt.l.d a0, fa5
-; RV64-i64-NEXT: vslide1down.vx v10, v8, a0
; RV64-i64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-i64-NEXT: vslidedown.vi v12, v8, 1
-; RV64-i64-NEXT: vfmv.f.s fa5, v12
+; RV64-i64-NEXT: vslidedown.vi v10, v8, 1
+; RV64-i64-NEXT: vfmv.f.s fa5, v10
; RV64-i64-NEXT: fcvt.l.d a0, fa5
+; RV64-i64-NEXT: vfmv.f.s fa5, v8
+; RV64-i64-NEXT: fcvt.l.d a1, fa5
; RV64-i64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-i64-NEXT: vmv.v.x v10, a1
; RV64-i64-NEXT: vslide1down.vx v10, v10, a0
; RV64-i64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV64-i64-NEXT: vslidedown.vi v12, v8, 2
@@ -566,15 +556,14 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
; RV32-NEXT: mv a0, sp
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vse64.v v8, (a0)
-; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: fcvt.w.d a0, fa5
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vslide1down.vx v10, v8, a0
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v8, 1
-; RV32-NEXT: vfmv.f.s fa5, v12
+; RV32-NEXT: vslidedown.vi v10, v8, 1
+; RV32-NEXT: vfmv.f.s fa5, v10
; RV32-NEXT: fcvt.w.d a0, fa5
+; RV32-NEXT: vfmv.f.s fa5, v8
+; RV32-NEXT: fcvt.w.d a1, fa5
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vslide1down.vx v10, v10, a0
; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV32-NEXT: vslidedown.vi v12, v8, 2
@@ -620,15 +609,14 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
; RV64-i32-NEXT: mv a0, sp
; RV64-i32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-i32-NEXT: vse64.v v8, (a0)
-; RV64-i32-NEXT: vfmv.f.s fa5, v8
-; RV64-i32-NEXT: fcvt.l.d a0, fa5
-; RV64-i32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-i32-NEXT: vslide1down.vx v10, v8, a0
; RV64-i32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-i32-NEXT: vslidedown.vi v12, v8, 1
-; RV64-i32-NEXT: vfmv.f.s fa5, v12
+; RV64-i32-NEXT: vslidedown.vi v10, v8, 1
+; RV64-i32-NEXT: vfmv.f.s fa5, v10
; RV64-i32-NEXT: fcvt.l.d a0, fa5
+; RV64-i32-NEXT: vfmv.f.s fa5, v8
+; RV64-i32-NEXT: fcvt.l.d a1, fa5
; RV64-i32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64-i32-NEXT: vmv.v.x v10, a1
; RV64-i32-NEXT: vslide1down.vx v10, v10, a0
; RV64-i32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV64-i32-NEXT: vslidedown.vi v12, v8, 2
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
index d23c494ba37a061..281facd6f6bde8b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
@@ -55,7 +55,7 @@ define <2 x i1> @buildvec_mask_nonconst_v2i1(i1 %x, i1 %y) {
; CHECK-LABEL: buildvec_mask_nonconst_v2i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: vslide1down.vx v8, v8, a1
; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
@@ -64,7 +64,7 @@ define <2 x i1> @buildvec_mask_nonconst_v2i1(i1 %x, i1 %y) {
; ZVE32F-LABEL: buildvec_mask_nonconst_v2i1:
; ZVE32F: # %bb.0:
; ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT: vmv.v.x v8, a0
; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
; ZVE32F-NEXT: vand.vi v8, v8, 1
; ZVE32F-NEXT: vmsne.vi v0, v8, 0
@@ -79,7 +79,7 @@ define <2 x i1> @buildvec_mask_optsize_nonconst_v2i1(i1 %x, i1 %y) optsize {
; CHECK-LABEL: buildvec_mask_optsize_nonconst_v2i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: vslide1down.vx v8, v8, a1
; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
@@ -88,7 +88,7 @@ define <2 x i1> @buildvec_mask_optsize_nonconst_v2i1(i1 %x, i1 %y) optsize {
; ZVE32F-LABEL: buildvec_mask_optsize_nonconst_v2i1:
; ZVE32F: # %bb.0:
; ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT: vmv.v.x v8, a0
; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
; ZVE32F-NEXT: vand.vi v8, v8, 1
; ZVE32F-NEXT: vmsne.vi v0, v8, 0
@@ -175,7 +175,7 @@ define <4 x i1> @buildvec_mask_optsize_nonconst_v4i1(i1 %x, i1 %y) optsize {
; CHECK-LABEL: buildvec_mask_optsize_nonconst_v4i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: vslide1down.vx v8, v8, a0
; CHECK-NEXT: vslide1down.vx v8, v8, a1
; CHECK-NEXT: vslide1down.vx v8, v8, a1
@@ -186,7 +186,7 @@ define <4 x i1> @buildvec_mask_optsize_nonconst_v4i1(i1 %x, i1 %y) optsize {
; ZVE32F-LABEL: buildvec_mask_optsize_nonconst_v4i1:
; ZVE32F: # %bb.0:
; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT: vmv.v.x v8, a0
; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
@@ -204,7 +204,7 @@ define <4 x i1> @buildvec_mask_nonconst_v4i1_2(i1 %x, i1 %y) {
; CHECK-LABEL: buildvec_mask_nonconst_v4i1_2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT: vslide1down.vx v8, v8, zero
+; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vslide1down.vx v8, v8, a0
; CHECK-NEXT: li a0, 1
; CHECK-NEXT: vslide1down.vx v8, v8, a0
@@ -216,7 +216,7 @@ define <4 x i1> @buildvec_mask_nonconst_v4i1_2(i1 %x, i1 %y) {
; ZVE32F-LABEL: buildvec_mask_nonconst_v4i1_2:
; ZVE32F: # %bb.0:
; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; ZVE32F-NEXT: vslide1down.vx v8, v8, zero
+; ZVE32F-NEXT: vmv.v.i v8, 0
; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
; ZVE32F-NEXT: li a0, 1
; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
@@ -285,7 +285,7 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) {
; CHECK-LABEL: buildvec_mask_nonconst_v8i1_2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: vslide1down.vx v8, v8, a0
; CHECK-NEXT: li a4, 1
; CHECK-NEXT: vslide1down.vx v8, v8, a4
@@ -301,7 +301,7 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) {
; ZVE32F-LABEL: buildvec_mask_nonconst_v8i1_2:
; ZVE32F: # %bb.0:
; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT: vmv.v.x v8, a0
; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
; ZVE32F-NEXT: li a4, 1
; ZVE32F-NEXT: vslide1down.vx v8, v8, a4
@@ -328,7 +328,7 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %
; CHECK-LABEL: buildvec_mask_optsize_nonconst_v8i1_2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: vslide1down.vx v8, v8, a0
; CHECK-NEXT: li a4, 1
; CHECK-NEXT: vslide1down.vx v8, v8, a4
@@ -344,7 +344,7 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %
; ZVE32F-LABEL: buildvec_mask_optsize_nonconst_v8i1_2:
; ZVE32F: # %bb.0:
; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT: vmv.v.x v8, a0
; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
; ZVE32F-NEXT: li a4, 1
; ZVE32F-NEXT: vslide1down.vx v8, v8, a4
@@ -371,7 +371,7 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1(i1 %x, i1 %y) optsize {
; CHECK-LABEL: buildvec_mask_optsize_nonconst_v8i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: vslide1down.vx v8, v8, a0
; CHECK-NEXT: vslide1down.vx v8, v8, a1
; CHECK-NEXT: vslide1down.vx v8, v8, a1
@@ -386,7 +386,7 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1(i1 %x, i1 %y) optsize {
; ZVE32F-LABEL: buildvec_mask_optsize_nonconst_v8i1:
; ZVE32F: # %bb.0:
; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT: vmv.v.x v8, a0
; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 728cf18e1a77d8a..d74fd6cd3f03470 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -6765,13 +6765,12 @@ define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m,
; RV32ZVE32F-NEXT: lw a6, 40(a2)
; RV32ZVE32F-NEXT: lw a7, 32(a2)
; RV32ZVE32F-NEXT: lw t0, 24(a2)
-; RV32ZVE32F-NEXT: lw t1, 0(a2)
+; RV32ZVE32F-NEXT: lw t1, 16(a2)
; RV32ZVE32F-NEXT: lw t2, 8(a2)
-; RV32ZVE32F-NEXT: lw a2, 16(a2)
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t1
+; RV32ZVE32F-NEXT: vlse32.v v8, (a2), zero
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t2
-; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t1
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6
@@ -11934,13 +11933,12 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1>
; RV32ZVE32F-NEXT: lw a5, 40(a2)
; RV32ZVE32F-NEXT: lw a6, 32(a2)
; RV32ZVE32F-NEXT: lw a7, 24(a2)
-; RV32ZVE32F-NEXT: lw t0, 0(a2)
+; RV32ZVE32F-NEXT: lw t0, 16(a2)
; RV32ZVE32F-NEXT: lw t1, 8(a2)
-; RV32ZVE32F-NEXT: lw a2, 16(a2)
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0
+; RV32ZVE32F-NEXT: vlse32.v v8, (a2), zero
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t1
-; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
index 60b61e889315cfe..ecc81cbaa503db4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
@@ -1674,18 +1674,17 @@ define void @mscatter_v2i64_truncstore_v2i32(<2 x i64> %val, <2 x ptr> %ptrs, <2
;
; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i32:
; RV32ZVE32F: # %bb.0:
-; RV32ZVE32F-NEXT: lw a1, 0(a0)
-; RV32ZVE32F-NEXT: lw a0, 8(a0)
+; RV32ZVE32F-NEXT: lw a1, 8(a0)
; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; RV32ZVE32F-NEXT: vslide1down.vx v9, v8, a1
-; RV32ZVE32F-NEXT: vslide1down.vx v9, v9, a0
+; RV32ZVE32F-NEXT: vlse32.v v9, (a0), zero
+; RV32ZVE32F-NEXT: vslide1down.vx v9, v9, a1
; RV32ZVE32F-NEXT: vsoxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVE32F-NEXT: vmv.v.x v8, a0
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a0, v0
@@ -5751,13 +5750,12 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
; RV32ZVE32F-NEXT: lw s4, 40(a2)
; RV32ZVE32F-NEXT: lw s5, 32(a2)
; RV32ZVE32F-NEXT: lw s6, 24(a2)
-; RV32ZVE32F-NEXT: lw s7, 0(a2)
+; RV32ZVE32F-NEXT: lw s7, 16(a2)
; RV32ZVE32F-NEXT: lw s8, 8(a2)
-; RV32ZVE32F-NEXT: lw a2, 16(a2)
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s7
+; RV32ZVE32F-NEXT: vlse32.v v8, (a2), zero
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s8
-; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s7
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s6
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s5
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s4
@@ -10459,13 +10457,12 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idx
; RV32ZVE32F-NEXT: lw a4, 40(a1)
; RV32ZVE32F-NEXT: lw a5, 32(a1)
; RV32ZVE32F-NEXT: lw a6, 24(a1)
-; RV32ZVE32F-NEXT: lw a7, 0(a1)
+; RV32ZVE32F-NEXT: lw a7, 16(a1)
; RV32ZVE32F-NEXT: lw t0, 8(a1)
-; RV32ZVE32F-NEXT: lw a1, 16(a1)
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7
+; RV32ZVE32F-NEXT: vlse32.v v8, (a1), zero
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0
-; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a1
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
index d27e7799a38628a..cd47720e38216f8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
@@ -8,9 +8,9 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) {
; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; CHECK-NEXT: lbu a2, 0(a2)
; CHECK-NEXT: vle32.v v8, (a1)
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vslide1down.vx v10, v8, a2
; CHECK-NEXT: srli a1, a2, 1
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a2
; CHECK-NEXT: vslide1down.vx v10, v10, a1
; CHECK-NEXT: srli a1, a2, 2
; CHECK-NEXT: vslide1down.vx v10, v10, a1
@@ -41,9 +41,9 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) {
; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; CHECK-NEXT: lbu a2, 0(a2)
; CHECK-NEXT: vle32.v v8, (a1)
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vslide1down.vx v10, v8, a2
; CHECK-NEXT: srli a1, a2, 1
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a2
; CHECK-NEXT: vslide1down.vx v10, v10, a1
; CHECK-NEXT: srli a1, a2, 2
; CHECK-NEXT: vslide1down.vx v10, v10, a1
@@ -76,9 +76,9 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) {
; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; CHECK-NEXT: lbu a1, 0(a1)
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vslide1down.vx v10, v8, a1
; CHECK-NEXT: srli a0, a1, 1
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
; CHECK-NEXT: vslide1down.vx v10, v10, a0
; CHECK-NEXT: srli a0, a1, 2
; CHECK-NEXT: vslide1down.vx v10, v10, a0
@@ -112,9 +112,9 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) {
; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; CHECK-NEXT: lbu a2, 0(a2)
; CHECK-NEXT: vle32.v v8, (a1)
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vslide1down.vx v10, v8, a2
; CHECK-NEXT: srli a1, a2, 1
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a2
; CHECK-NEXT: vslide1down.vx v10, v10, a1
; CHECK-NEXT: srli a1, a2, 2
; CHECK-NEXT: vslide1down.vx v10, v10, a1
@@ -145,9 +145,9 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) {
; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; CHECK-NEXT: lbu a1, 0(a1)
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vslide1down.vx v10, v8, a1
; CHECK-NEXT: srli a0, a1, 1
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
; CHECK-NEXT: vslide1down.vx v10, v10, a0
; CHECK-NEXT: srli a0, a1, 2
; CHECK-NEXT: vslide1down.vx v10, v10, a0
@@ -180,9 +180,9 @@ define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) {
; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; CHECK-NEXT: lbu a1, 0(a1)
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vslide1down.vx v10, v8, a1
; CHECK-NEXT: srli a0, a1, 1
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
; CHECK-NEXT: vslide1down.vx v10, v10, a0
; CHECK-NEXT: srli a0, a1, 2
; CHECK-NEXT: vslide1down.vx v10, v10, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-vector-cmp.ll b/llvm/test/CodeGen/RISCV/rvv/fold-vector-cmp.ll
index ca833451233becb..e24b23c9b2d3298 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fold-vector-cmp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fold-vector-cmp.ll
@@ -13,7 +13,7 @@ define i32 @test(i32 %call.i) {
; CHECK-V-LABEL: test:
; CHECK-V: # %bb.0:
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-V-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-V-NEXT: vmv.v.x v8, a0
; CHECK-V-NEXT: lui a0, 524288
; CHECK-V-NEXT: vslide1down.vx v8, v8, a0
; CHECK-V-NEXT: addi a0, a0, 2
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index d311311175c15e6..122388c1b73ec3e 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -634,7 +634,7 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV32MV-NEXT: li a3, 0
; RV32MV-NEXT: call __moddi3 at plt
; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32MV-NEXT: vslide1down.vx v8, v8, a0
+; RV32MV-NEXT: vmv.v.x v8, a0
; RV32MV-NEXT: vslide1down.vx v8, v8, a1
; RV32MV-NEXT: addi a0, sp, 16
; RV32MV-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
@@ -765,7 +765,7 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV64MV-NEXT: mul a4, a4, a5
; RV64MV-NEXT: sub a1, a1, a4
; RV64MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64MV-NEXT: vslide1down.vx v8, v8, a1
+; RV64MV-NEXT: vmv.v.x v8, a1
; RV64MV-NEXT: vslide1down.vx v8, v8, a3
; RV64MV-NEXT: vslide1down.vx v8, v8, a2
; RV64MV-NEXT: vslidedown.vi v8, v8, 1
diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
index df30946218dfad8..f629c0d178913c4 100644
--- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
@@ -526,10 +526,10 @@ define void @test_urem_vec(ptr %X) nounwind {
; RV32MV-NEXT: slli a1, a1, 10
; RV32MV-NEXT: srli a3, a2, 22
; RV32MV-NEXT: or a1, a3, a1
+; RV32MV-NEXT: srli a3, a2, 11
; RV32MV-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32MV-NEXT: vslide1down.vx v8, v8, a2
-; RV32MV-NEXT: srli a2, a2, 11
-; RV32MV-NEXT: vslide1down.vx v8, v8, a2
+; RV32MV-NEXT: vmv.v.x v8, a2
+; RV32MV-NEXT: vslide1down.vx v8, v8, a3
; RV32MV-NEXT: vslide1down.vx v8, v8, a1
; RV32MV-NEXT: vslidedown.vi v8, v8, 1
; RV32MV-NEXT: li a1, 2047
@@ -586,7 +586,7 @@ define void @test_urem_vec(ptr %X) nounwind {
; RV64MV-NEXT: slli a1, a1, 32
; RV64MV-NEXT: or a1, a2, a1
; RV64MV-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV64MV-NEXT: vslide1down.vx v8, v8, a1
+; RV64MV-NEXT: vmv.v.x v8, a1
; RV64MV-NEXT: slli a1, a1, 24
; RV64MV-NEXT: srli a1, a1, 24
; RV64MV-NEXT: srli a2, a1, 11
More information about the llvm-commits
mailing list