[llvm] 792631c - [RISCV] Use NoV0 register classes for masked `VPseudoBinaryM` (#175706)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 15 22:46:31 PST 2026


Author: Pengcheng Wang
Date: 2026-01-16T14:46:25+08:00
New Revision: 792631c1fa5dc6ef2b306d2d3cf64b6cdf39146c

URL: https://github.com/llvm/llvm-project/commit/792631c1fa5dc6ef2b306d2d3cf64b6cdf39146c
DIFF: https://github.com/llvm/llvm-project/commit/792631c1fa5dc6ef2b306d2d3cf64b6cdf39146c.diff

LOG: [RISCV] Use NoV0 register classes for masked `VPseudoBinaryM` (#175706)

There are two constraints:

1. The same register can't have two EEWs. `V0` is already the mask
register, so other register source operands can't be `V0`.
2. The destination and source registers can't overlap. We have added
`@earlyclobber` constraint so we won' allocate `V0` to destination.

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
    llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
    llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp-mask.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfclass-vp.ll
    llvm/test/CodeGen/RISCV/rvv/floor-vp.ll
    llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll
    llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll
    llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll
    llvm/test/CodeGen/RISCV/rvv/pr165232.ll
    llvm/test/CodeGen/RISCV/rvv/rint-vp.ll
    llvm/test/CodeGen/RISCV/rvv/round-vp.ll
    llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll
    llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll
    llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
    llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll
    llvm/test/CodeGen/RISCV/rvv/vfptosi-vp-mask.ll
    llvm/test/CodeGen/RISCV/rvv/vfptoui-vp-mask.ll
    llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
    llvm/test/CodeGen/RISCV/rvv/vmsge.ll
    llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll
    llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 566cf632f7745..e5d3b51f5e612 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -2596,7 +2596,11 @@ multiclass VPseudoBinaryM<DAGOperand Op2Class, LMULInfo m, bit Commutable = 0> {
                           !if(!ge(m.octuple, 16), "@earlyclobber $rd", ""),
                           TargetConstraintType = 2>;
     def "_" # m.MX # "_MASK" :
-      VPseudoBinaryMOutMask<VR, m.vrclass, Op2Class,
+      VPseudoBinaryMOutMask<!if(!ge(m.octuple, 16), VRNoV0, VR),
+                            GetVRegNoV0<m.vrclass>.R,
+                            !if(!isa<VReg>(Op2Class),
+                                GetVRegNoV0<!cast<VReg>(Op2Class)>.R,
+                                Op2Class),
                             !if(!ge(m.octuple, 16), "@earlyclobber $rd", ""),
                             TargetConstraintType = 2>,
       RISCVMaskedPseudo<MaskIdx=3>;

diff  --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 0770935b749d9..52331a0952f70 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -807,6 +807,8 @@ RISCVRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
                                              const MachineFunction &) const {
   if (RC == &RISCV::VMV0RegClass)
     return &RISCV::VRRegClass;
+  if (RC == &RISCV::VMNoV0RegClass)
+    return &RISCV::VRRegClass;
   if (RC == &RISCV::VRNoV0RegClass)
     return &RISCV::VRRegClass;
   if (RC == &RISCV::VRM2NoV0RegClass)

diff  --git a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
index 6c7709f52e30b..00fef2e6304ce 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
@@ -120,24 +120,24 @@ define <vscale x 4 x bfloat> @vp_ceil_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vs
 ; CHECK-LABEL: vp_ceil_vv_nxv4bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v9, v0
+; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
-; CHECK-NEXT:    vmv1r.v v8, v0
+; CHECK-NEXT:    vmv1r.v v13, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v10, v0.t
+; CHECK-NEXT:    vfabs.v v8, v10, v0.t
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v8, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v13, v8, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v10, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v8, v10, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v9
+; CHECK-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10, v0.t
 ; CHECK-NEXT:    ret
@@ -172,24 +172,24 @@ define <vscale x 8 x bfloat> @vp_ceil_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vs
 ; CHECK-LABEL: vp_ceil_vv_nxv8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
+; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
-; CHECK-NEXT:    vmv1r.v v8, v0
+; CHECK-NEXT:    vmv1r.v v17, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v12, v0.t
+; CHECK-NEXT:    vfabs.v v8, v12, v0.t
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v8, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v17, v8, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmv1r.v v0, v17
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v8, v12, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12, v0.t
 ; CHECK-NEXT:    ret
@@ -224,24 +224,24 @@ define <vscale x 16 x bfloat> @vp_ceil_vv_nxv16bf16(<vscale x 16 x bfloat> %va,
 ; CHECK-LABEL: vp_ceil_vv_nxv16bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmv1r.v v24, v0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
-; CHECK-NEXT:    vmv1r.v v8, v0
+; CHECK-NEXT:    vmv1r.v v25, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v16, v0.t
+; CHECK-NEXT:    vfabs.v v8, v16, v0.t
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v8, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v25, v8, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
 ; CHECK-NEXT:    ret
@@ -585,24 +585,24 @@ define <vscale x 4 x half> @vp_ceil_vv_nxv4f16(<vscale x 4 x half> %va, <vscale
 ; ZVFHMIN-LABEL: vp_ceil_vv_nxv4f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v9, v0
+; ZVFHMIN-NEXT:    vmv1r.v v12, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
-; ZVFHMIN-NEXT:    vmv1r.v v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v13, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v8, v12, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v13, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    fsrmi a0, 3
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmv1r.v v0, v13
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v9
+; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -653,44 +653,44 @@ define <vscale x 8 x half> @vp_ceil_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; ZVFH-LABEL: vp_ceil_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vmv1r.v v12, v0
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    li a0, 25
 ; ZVFH-NEXT:    slli a0, a0, 10
 ; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 3
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmv1r.v v0, v12
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_ceil_vv_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v10, v0
+; ZVFHMIN-NEXT:    vmv1r.v v16, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
-; ZVFHMIN-NEXT:    vmv1r.v v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v17, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v8, v16, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v17, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    fsrmi a0, 3
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmv1r.v v0, v17
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; ZVFHMIN-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -741,44 +741,44 @@ define <vscale x 16 x half> @vp_ceil_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
 ; ZVFH-LABEL: vp_ceil_vv_nxv16f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFH-NEXT:    vmv1r.v v12, v0
-; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-NEXT:    vmv1r.v v16, v0
+; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; ZVFH-NEXT:    li a0, 25
 ; ZVFH-NEXT:    slli a0, a0, 10
 ; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 3
-; ZVFH-NEXT:    vmv1r.v v0, v12
+; ZVFH-NEXT:    vmv1r.v v0, v16
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_ceil_vv_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v12, v0
+; ZVFHMIN-NEXT:    vmv1r.v v24, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
-; ZVFHMIN-NEXT:    vmv1r.v v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v25, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v8, v24, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v25, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    fsrmi a0, 3
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmv1r.v v0, v25
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v12
+; ZVFHMIN-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -829,21 +829,21 @@ define <vscale x 32 x half> @vp_ceil_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; ZVFH-LABEL: vp_ceil_vv_nxv32f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; ZVFH-NEXT:    vmv1r.v v16, v0
-; ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-NEXT:    vmv1r.v v24, v0
+; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; ZVFH-NEXT:    li a0, 25
 ; ZVFH-NEXT:    slli a0, a0, 10
 ; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 3
-; ZVFH-NEXT:    vmv1r.v v0, v16
+; ZVFH-NEXT:    vmv1r.v v0, v24
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_ceil_vv_nxv32f16:
@@ -1064,20 +1064,20 @@ define <vscale x 4 x float> @vp_ceil_vv_nxv4f32(<vscale x 4 x float> %va, <vscal
 ; CHECK-LABEL: vp_ceil_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x float> @llvm.vp.ceil.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x float> %v
@@ -1106,20 +1106,20 @@ define <vscale x 8 x float> @vp_ceil_vv_nxv8f32(<vscale x 8 x float> %va, <vscal
 ; CHECK-LABEL: vp_ceil_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x float> @llvm.vp.ceil.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x float> %v
@@ -1148,20 +1148,20 @@ define <vscale x 16 x float> @vp_ceil_vv_nxv16f32(<vscale x 16 x float> %va, <vs
 ; CHECK-LABEL: vp_ceil_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x float> @llvm.vp.ceil.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x float> %v
@@ -1264,40 +1264,40 @@ define <vscale x 2 x double> @vp_ceil_vv_nxv2f64(<vscale x 2 x double> %va, <vsc
 ; RV32ZFH-LABEL: vp_ceil_vv_nxv2f64:
 ; RV32ZFH:       # %bb.0:
 ; RV32ZFH-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32ZFH-NEXT:    vmv1r.v v10, v0
+; RV32ZFH-NEXT:    vmv1r.v v12, v0
 ; RV32ZFH-NEXT:    lui a0, %hi(.LCPI36_0)
 ; RV32ZFH-NEXT:    fld fa5, %lo(.LCPI36_0)(a0)
-; RV32ZFH-NEXT:    vfabs.v v12, v8, v0.t
+; RV32ZFH-NEXT:    vfabs.v v10, v8, v0.t
 ; RV32ZFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV32ZFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV32ZFH-NEXT:    fsrmi a0, 3
-; RV32ZFH-NEXT:    vmv1r.v v0, v10
+; RV32ZFH-NEXT:    vmv1r.v v0, v12
 ; RV32ZFH-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32ZFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32ZFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV32ZFH-NEXT:    fsrm a0
-; RV32ZFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32ZFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV32ZFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32ZFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32ZFH-NEXT:    ret
 ;
 ; RV64ZFH-LABEL: vp_ceil_vv_nxv2f64:
 ; RV64ZFH:       # %bb.0:
 ; RV64ZFH-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64ZFH-NEXT:    vmv1r.v v10, v0
-; RV64ZFH-NEXT:    vfabs.v v12, v8, v0.t
+; RV64ZFH-NEXT:    vmv1r.v v12, v0
+; RV64ZFH-NEXT:    vfabs.v v10, v8, v0.t
 ; RV64ZFH-NEXT:    li a0, 1075
 ; RV64ZFH-NEXT:    slli a0, a0, 52
 ; RV64ZFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV64ZFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV64ZFH-NEXT:    fsrmi a0, 3
-; RV64ZFH-NEXT:    vmv1r.v v0, v10
+; RV64ZFH-NEXT:    vmv1r.v v0, v12
 ; RV64ZFH-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64ZFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64ZFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV64ZFH-NEXT:    fsrm a0
-; RV64ZFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64ZFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV64ZFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64ZFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64ZFH-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.ceil.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
@@ -1342,40 +1342,40 @@ define <vscale x 4 x double> @vp_ceil_vv_nxv4f64(<vscale x 4 x double> %va, <vsc
 ; RV32ZFH-LABEL: vp_ceil_vv_nxv4f64:
 ; RV32ZFH:       # %bb.0:
 ; RV32ZFH-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32ZFH-NEXT:    vmv1r.v v12, v0
+; RV32ZFH-NEXT:    vmv1r.v v16, v0
 ; RV32ZFH-NEXT:    lui a0, %hi(.LCPI38_0)
 ; RV32ZFH-NEXT:    fld fa5, %lo(.LCPI38_0)(a0)
-; RV32ZFH-NEXT:    vfabs.v v16, v8, v0.t
+; RV32ZFH-NEXT:    vfabs.v v12, v8, v0.t
 ; RV32ZFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV32ZFH-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV32ZFH-NEXT:    fsrmi a0, 3
-; RV32ZFH-NEXT:    vmv1r.v v0, v12
+; RV32ZFH-NEXT:    vmv1r.v v0, v16
 ; RV32ZFH-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV32ZFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32ZFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV32ZFH-NEXT:    fsrm a0
-; RV32ZFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32ZFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV32ZFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32ZFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32ZFH-NEXT:    ret
 ;
 ; RV64ZFH-LABEL: vp_ceil_vv_nxv4f64:
 ; RV64ZFH:       # %bb.0:
 ; RV64ZFH-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64ZFH-NEXT:    vmv1r.v v12, v0
-; RV64ZFH-NEXT:    vfabs.v v16, v8, v0.t
+; RV64ZFH-NEXT:    vmv1r.v v16, v0
+; RV64ZFH-NEXT:    vfabs.v v12, v8, v0.t
 ; RV64ZFH-NEXT:    li a0, 1075
 ; RV64ZFH-NEXT:    slli a0, a0, 52
 ; RV64ZFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV64ZFH-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV64ZFH-NEXT:    fsrmi a0, 3
-; RV64ZFH-NEXT:    vmv1r.v v0, v12
+; RV64ZFH-NEXT:    vmv1r.v v0, v16
 ; RV64ZFH-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV64ZFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64ZFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV64ZFH-NEXT:    fsrm a0
-; RV64ZFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64ZFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV64ZFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64ZFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64ZFH-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.vp.ceil.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x double> %v
@@ -1420,40 +1420,40 @@ define <vscale x 7 x double> @vp_ceil_vv_nxv7f64(<vscale x 7 x double> %va, <vsc
 ; RV32ZFH-LABEL: vp_ceil_vv_nxv7f64:
 ; RV32ZFH:       # %bb.0:
 ; RV32ZFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZFH-NEXT:    vmv1r.v v16, v0
+; RV32ZFH-NEXT:    vmv1r.v v24, v0
 ; RV32ZFH-NEXT:    lui a0, %hi(.LCPI40_0)
 ; RV32ZFH-NEXT:    fld fa5, %lo(.LCPI40_0)(a0)
-; RV32ZFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZFH-NEXT:    fsrmi a0, 3
-; RV32ZFH-NEXT:    vmv1r.v v0, v16
+; RV32ZFH-NEXT:    vmv1r.v v0, v24
 ; RV32ZFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZFH-NEXT:    fsrm a0
-; RV32ZFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZFH-NEXT:    ret
 ;
 ; RV64ZFH-LABEL: vp_ceil_vv_nxv7f64:
 ; RV64ZFH:       # %bb.0:
 ; RV64ZFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZFH-NEXT:    vmv1r.v v16, v0
-; RV64ZFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZFH-NEXT:    vmv1r.v v24, v0
+; RV64ZFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZFH-NEXT:    li a0, 1075
 ; RV64ZFH-NEXT:    slli a0, a0, 52
 ; RV64ZFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZFH-NEXT:    fsrmi a0, 3
-; RV64ZFH-NEXT:    vmv1r.v v0, v16
+; RV64ZFH-NEXT:    vmv1r.v v0, v24
 ; RV64ZFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZFH-NEXT:    fsrm a0
-; RV64ZFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZFH-NEXT:    ret
   %v = call <vscale x 7 x double> @llvm.vp.ceil.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x double> %v
@@ -1498,40 +1498,40 @@ define <vscale x 8 x double> @vp_ceil_vv_nxv8f64(<vscale x 8 x double> %va, <vsc
 ; RV32ZFH-LABEL: vp_ceil_vv_nxv8f64:
 ; RV32ZFH:       # %bb.0:
 ; RV32ZFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZFH-NEXT:    vmv1r.v v16, v0
+; RV32ZFH-NEXT:    vmv1r.v v24, v0
 ; RV32ZFH-NEXT:    lui a0, %hi(.LCPI42_0)
 ; RV32ZFH-NEXT:    fld fa5, %lo(.LCPI42_0)(a0)
-; RV32ZFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZFH-NEXT:    fsrmi a0, 3
-; RV32ZFH-NEXT:    vmv1r.v v0, v16
+; RV32ZFH-NEXT:    vmv1r.v v0, v24
 ; RV32ZFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZFH-NEXT:    fsrm a0
-; RV32ZFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZFH-NEXT:    ret
 ;
 ; RV64ZFH-LABEL: vp_ceil_vv_nxv8f64:
 ; RV64ZFH:       # %bb.0:
 ; RV64ZFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZFH-NEXT:    vmv1r.v v16, v0
-; RV64ZFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZFH-NEXT:    vmv1r.v v24, v0
+; RV64ZFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZFH-NEXT:    li a0, 1075
 ; RV64ZFH-NEXT:    slli a0, a0, 52
 ; RV64ZFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZFH-NEXT:    fsrmi a0, 3
-; RV64ZFH-NEXT:    vmv1r.v v0, v16
+; RV64ZFH-NEXT:    vmv1r.v v0, v24
 ; RV64ZFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZFH-NEXT:    fsrm a0
-; RV64ZFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZFH-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.ceil.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x double> %v

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll
index 466d5d4b8e80a..4452e161ad593 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll
@@ -202,24 +202,24 @@ define <8 x half> @vp_ceil_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl)
 ; ZVFHMIN-LABEL: vp_ceil_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v9, v0
+; ZVFHMIN-NEXT:    vmv1r.v v12, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
-; ZVFHMIN-NEXT:    vmv1r.v v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v13, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v8, v12, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v13, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    fsrmi a0, 3
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmv1r.v v0, v13
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v9
+; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -270,44 +270,44 @@ define <16 x half> @vp_ceil_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %e
 ; ZVFH-LABEL: vp_ceil_v16f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vmv1r.v v12, v0
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    li a0, 25
 ; ZVFH-NEXT:    slli a0, a0, 10
 ; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 3
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmv1r.v v0, v12
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_ceil_v16f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v10, v0
+; ZVFHMIN-NEXT:    vmv1r.v v16, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
-; ZVFHMIN-NEXT:    vmv1r.v v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v17, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v8, v16, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v17, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    fsrmi a0, 3
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmv1r.v v0, v17
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; ZVFHMIN-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -438,20 +438,20 @@ define <8 x float> @vp_ceil_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl
 ; CHECK-LABEL: vp_ceil_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x float> @llvm.vp.ceil.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
   ret <8 x float> %v
@@ -480,20 +480,20 @@ define <16 x float> @vp_ceil_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext
 ; CHECK-LABEL: vp_ceil_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x float> @llvm.vp.ceil.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
   ret <16 x float> %v
@@ -662,79 +662,79 @@ define <4 x double> @vp_ceil_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %e
 ; RV32ZVFH-LABEL: vp_ceil_v4f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v10, v0
+; RV32ZVFH-NEXT:    vmv1r.v v12, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI18_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI18_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 3
-; RV32ZVFH-NEXT:    vmv1r.v v0, v10
+; RV32ZVFH-NEXT:    vmv1r.v v0, v12
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_ceil_v4f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v10, v0
-; RV64ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v12, v0
+; RV64ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 3
-; RV64ZVFH-NEXT:    vmv1r.v v0, v10
+; RV64ZVFH-NEXT:    vmv1r.v v0, v12
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_ceil_v4f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v10, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v12, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI18_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI18_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v10, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 3
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_ceil_v4f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v10, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v12, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v10, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 3
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <4 x double> @llvm.vp.ceil.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
@@ -810,79 +810,79 @@ define <8 x double> @vp_ceil_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %e
 ; RV32ZVFH-LABEL: vp_ceil_v8f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v12, v0
+; RV32ZVFH-NEXT:    vmv1r.v v16, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI20_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI20_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 3
-; RV32ZVFH-NEXT:    vmv1r.v v0, v12
+; RV32ZVFH-NEXT:    vmv1r.v v0, v16
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_ceil_v8f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v12, v0
-; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v16, v0
+; RV64ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 3
-; RV64ZVFH-NEXT:    vmv1r.v v0, v12
+; RV64ZVFH-NEXT:    vmv1r.v v0, v16
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_ceil_v8f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v12, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v16, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI20_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI20_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 3
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v12
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_ceil_v8f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v12, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 3
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v12
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <8 x double> @llvm.vp.ceil.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
   ret <8 x double> %v
@@ -958,79 +958,79 @@ define <15 x double> @vp_ceil_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroex
 ; RV32ZVFH-LABEL: vp_ceil_v15f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v16, v0
+; RV32ZVFH-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI22_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI22_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 3
-; RV32ZVFH-NEXT:    vmv1r.v v0, v16
+; RV32ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_ceil_v15f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v16, v0
-; RV64ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v24, v0
+; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 3
-; RV64ZVFH-NEXT:    vmv1r.v v0, v16
+; RV64ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_ceil_v15f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v16, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI22_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI22_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 3
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_ceil_v15f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v24, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 3
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <15 x double> @llvm.vp.ceil.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
   ret <15 x double> %v
@@ -1106,79 +1106,79 @@ define <16 x double> @vp_ceil_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroex
 ; RV32ZVFH-LABEL: vp_ceil_v16f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v16, v0
+; RV32ZVFH-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI24_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI24_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 3
-; RV32ZVFH-NEXT:    vmv1r.v v0, v16
+; RV32ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_ceil_v16f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v16, v0
-; RV64ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v24, v0
+; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 3
-; RV64ZVFH-NEXT:    vmv1r.v v0, v16
+; RV64ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_ceil_v16f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v16, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI24_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI24_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 3
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_ceil_v16f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v24, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 3
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <16 x double> @llvm.vp.ceil.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
   ret <16 x double> %v

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll
index 76f5f0a32bd1c..69a136d6e9358 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll
@@ -202,24 +202,24 @@ define <8 x half> @vp_floor_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl)
 ; ZVFHMIN-LABEL: vp_floor_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v9, v0
+; ZVFHMIN-NEXT:    vmv1r.v v12, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
-; ZVFHMIN-NEXT:    vmv1r.v v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v13, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v8, v12, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v13, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    fsrmi a0, 2
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmv1r.v v0, v13
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v9
+; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -270,44 +270,44 @@ define <16 x half> @vp_floor_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %
 ; ZVFH-LABEL: vp_floor_v16f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vmv1r.v v12, v0
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    li a0, 25
 ; ZVFH-NEXT:    slli a0, a0, 10
 ; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 2
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmv1r.v v0, v12
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_floor_v16f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v10, v0
+; ZVFHMIN-NEXT:    vmv1r.v v16, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
-; ZVFHMIN-NEXT:    vmv1r.v v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v17, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v8, v16, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v17, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    fsrmi a0, 2
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmv1r.v v0, v17
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; ZVFHMIN-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -438,20 +438,20 @@ define <8 x float> @vp_floor_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %ev
 ; CHECK-LABEL: vp_floor_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x float> @llvm.vp.floor.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
   ret <8 x float> %v
@@ -480,20 +480,20 @@ define <16 x float> @vp_floor_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext
 ; CHECK-LABEL: vp_floor_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x float> @llvm.vp.floor.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
   ret <16 x float> %v
@@ -662,79 +662,79 @@ define <4 x double> @vp_floor_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %
 ; RV32ZVFH-LABEL: vp_floor_v4f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v10, v0
+; RV32ZVFH-NEXT:    vmv1r.v v12, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI18_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI18_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 2
-; RV32ZVFH-NEXT:    vmv1r.v v0, v10
+; RV32ZVFH-NEXT:    vmv1r.v v0, v12
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_floor_v4f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v10, v0
-; RV64ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v12, v0
+; RV64ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 2
-; RV64ZVFH-NEXT:    vmv1r.v v0, v10
+; RV64ZVFH-NEXT:    vmv1r.v v0, v12
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_floor_v4f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v10, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v12, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI18_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI18_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v10, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 2
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_floor_v4f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v10, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v12, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v10, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 2
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <4 x double> @llvm.vp.floor.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
@@ -810,79 +810,79 @@ define <8 x double> @vp_floor_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %
 ; RV32ZVFH-LABEL: vp_floor_v8f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v12, v0
+; RV32ZVFH-NEXT:    vmv1r.v v16, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI20_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI20_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 2
-; RV32ZVFH-NEXT:    vmv1r.v v0, v12
+; RV32ZVFH-NEXT:    vmv1r.v v0, v16
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_floor_v8f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v12, v0
-; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v16, v0
+; RV64ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 2
-; RV64ZVFH-NEXT:    vmv1r.v v0, v12
+; RV64ZVFH-NEXT:    vmv1r.v v0, v16
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_floor_v8f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v12, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v16, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI20_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI20_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 2
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v12
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_floor_v8f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v12, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 2
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v12
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <8 x double> @llvm.vp.floor.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
   ret <8 x double> %v
@@ -958,79 +958,79 @@ define <15 x double> @vp_floor_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroe
 ; RV32ZVFH-LABEL: vp_floor_v15f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v16, v0
+; RV32ZVFH-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI22_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI22_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 2
-; RV32ZVFH-NEXT:    vmv1r.v v0, v16
+; RV32ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_floor_v15f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v16, v0
-; RV64ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v24, v0
+; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 2
-; RV64ZVFH-NEXT:    vmv1r.v v0, v16
+; RV64ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_floor_v15f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v16, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI22_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI22_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 2
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_floor_v15f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v24, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 2
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <15 x double> @llvm.vp.floor.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
   ret <15 x double> %v
@@ -1106,79 +1106,79 @@ define <16 x double> @vp_floor_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroe
 ; RV32ZVFH-LABEL: vp_floor_v16f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v16, v0
+; RV32ZVFH-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI24_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI24_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 2
-; RV32ZVFH-NEXT:    vmv1r.v v0, v16
+; RV32ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_floor_v16f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v16, v0
-; RV64ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v24, v0
+; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 2
-; RV64ZVFH-NEXT:    vmv1r.v v0, v16
+; RV64ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_floor_v16f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v16, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI24_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI24_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 2
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_floor_v16f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v24, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 2
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <16 x double> @llvm.vp.floor.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
   ret <16 x double> %v

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll
index da6e2fae93687..29797f616777b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll
@@ -163,13 +163,13 @@ define <8 x half> @vfmax_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v8, v14, v14, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v14, v12, v0
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v14, v12, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v14, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v11, v12, v12, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v11
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v12, v14, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vfmax.vv v12, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v12, v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -233,13 +233,13 @@ define <16 x half> @vfmax_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1>
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v8, v20, v20, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v24, v20, v16, v0
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v20, v16, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v20, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v13, v16, v16, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v13
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v20, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vfmax.vv v16, v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v16, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -506,32 +506,17 @@ define <8 x double> @vfmax_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %vb,
 define <16 x double> @vfmax_vv_v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfmax_vv_v16f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v7, v0
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v24, v8, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v6, v16, v16, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfmax.vv v8, v8, v16, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    .cfi_def_cfa sp, 16
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    vfmax.vv v8, v8, v24, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x double> @llvm.vp.maximum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 %evl)
   ret <16 x double> %v
@@ -558,25 +543,25 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 5
+; CHECK-NEXT:    li a3, 24
+; CHECK-NEXT:    mul a1, a1, a3
 ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v6, v0
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    li a3, 24
-; CHECK-NEXT:    mul a1, a1, a3
+; CHECK-NEXT:    slli a1, a1, 4
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    addi a1, a0, 128
 ; CHECK-NEXT:    vle64.v v16, (a1)
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
+; CHECK-NEXT:    slli a1, a1, 3
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    vle64.v v16, (a0)
+; CHECK-NEXT:    vle64.v v24, (a0)
 ; CHECK-NEXT:    li a1, 16
 ; CHECK-NEXT:    mv a0, a2
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
@@ -587,29 +572,16 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
 ; CHECK-NEXT:  .LBB24_2:
 ; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v26, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v26
-; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vmfeq.vv v5, v8, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v5
+; CHECK-NEXT:    vmerge.vvm v16, v8, v24, v0
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    vmfeq.vv v26, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v26
-; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
+; CHECK-NEXT:    vmfeq.vv v5, v24, v24, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v5
+; CHECK-NEXT:    vmerge.vvm v8, v24, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vfmax.vv v8, v8, v16, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    addi a0, a2, -16
 ; CHECK-NEXT:    sltu a1, a2, a0
@@ -617,36 +589,30 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
 ; CHECK-NEXT:    and a0, a1, a0
 ; CHECK-NEXT:    vmv1r.v v0, v7
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    li a2, 24
-; CHECK-NEXT:    mul a1, a1, a2
+; CHECK-NEXT:    slli a1, a1, 4
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v24, v16, v16, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v24, v16, v8, v0
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
+; CHECK-NEXT:    vmfeq.vv v6, v8, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v6
+; CHECK-NEXT:    vmerge.vvm v16, v8, v16, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfmax.vv v16, v24, v8, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vfmax.vv v16, v16, v24, v0.t
+; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 5
+; CHECK-NEXT:    li a1, 24
+; CHECK-NEXT:    mul a0, a0, a1
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    .cfi_def_cfa sp, 16
 ; CHECK-NEXT:    addi sp, sp, 16

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll
index e179970199171..8c18675c2162b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll
@@ -163,13 +163,13 @@ define <8 x half> @vfmin_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v8, v14, v14, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v14, v12, v0
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v14, v12, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v14, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v11, v12, v12, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v11
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v12, v14, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vfmin.vv v12, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v12, v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -233,13 +233,13 @@ define <16 x half> @vfmin_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1>
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v8, v20, v20, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v24, v20, v16, v0
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v20, v16, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v20, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v13, v16, v16, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v13
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v20, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vfmin.vv v16, v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v16, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -506,32 +506,17 @@ define <8 x double> @vfmin_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %vb,
 define <16 x double> @vfmin_vv_v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfmin_vv_v16f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v7, v0
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v24, v8, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v6, v16, v16, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfmin.vv v8, v8, v16, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    .cfi_def_cfa sp, 16
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    vfmin.vv v8, v8, v24, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x double> @llvm.vp.minimum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 %evl)
   ret <16 x double> %v
@@ -558,25 +543,25 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 5
+; CHECK-NEXT:    li a3, 24
+; CHECK-NEXT:    mul a1, a1, a3
 ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v6, v0
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    li a3, 24
-; CHECK-NEXT:    mul a1, a1, a3
+; CHECK-NEXT:    slli a1, a1, 4
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    addi a1, a0, 128
 ; CHECK-NEXT:    vle64.v v16, (a1)
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
+; CHECK-NEXT:    slli a1, a1, 3
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    vle64.v v16, (a0)
+; CHECK-NEXT:    vle64.v v24, (a0)
 ; CHECK-NEXT:    li a1, 16
 ; CHECK-NEXT:    mv a0, a2
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
@@ -587,29 +572,16 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
 ; CHECK-NEXT:  .LBB24_2:
 ; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v26, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v26
-; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vmfeq.vv v5, v8, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v5
+; CHECK-NEXT:    vmerge.vvm v16, v8, v24, v0
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    vmfeq.vv v26, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v26
-; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
+; CHECK-NEXT:    vmfeq.vv v5, v24, v24, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v5
+; CHECK-NEXT:    vmerge.vvm v8, v24, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vfmin.vv v8, v8, v16, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    addi a0, a2, -16
 ; CHECK-NEXT:    sltu a1, a2, a0
@@ -617,36 +589,30 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
 ; CHECK-NEXT:    and a0, a1, a0
 ; CHECK-NEXT:    vmv1r.v v0, v7
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    li a2, 24
-; CHECK-NEXT:    mul a1, a1, a2
+; CHECK-NEXT:    slli a1, a1, 4
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v24, v16, v16, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v24, v16, v8, v0
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
+; CHECK-NEXT:    vmfeq.vv v6, v8, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v6
+; CHECK-NEXT:    vmerge.vvm v16, v8, v16, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfmin.vv v16, v24, v8, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vfmin.vv v16, v16, v24, v0.t
+; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 5
+; CHECK-NEXT:    li a1, 24
+; CHECK-NEXT:    mul a0, a0, a1
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    .cfi_def_cfa sp, 16
 ; CHECK-NEXT:    addi sp, sp, 16

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll
index 53dbbedc9a055..b46a0b0e5f55b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll
@@ -70,9 +70,9 @@ define <4 x i1> @vfptosi_v4i1_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %
 ; CHECK-LABEL: vfptosi_v4i1_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    vmsne.vi v8, v10, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v8, v0.t
+; CHECK-NEXT:    vmsne.vi v10, v8, 0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
   %v = call <4 x i1> @llvm.vp.fptosi.v4i1.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x i1> %v

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp-mask.ll
index 55fccb5db1aee..db1c03dc44dcd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp-mask.ll
@@ -70,9 +70,9 @@ define <4 x i1> @vfptoui_v4i1_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %
 ; CHECK-LABEL: vfptoui_v4i1_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v10, v8, v0.t
-; CHECK-NEXT:    vmsne.vi v8, v10, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v8, v0.t
+; CHECK-NEXT:    vmsne.vi v10, v8, 0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
   %v = call <4 x i1> @llvm.vp.fptoui.v4i1.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x i1> %v

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll
index 57c94830fc606..10ef72a3e3f9d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll
@@ -134,20 +134,20 @@ define <16 x half> @vp_nearbyint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroe
 ; CHECK-LABEL: vp_nearbyint_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    li a0, 25
 ; CHECK-NEXT:    slli a0, a0, 10
 ; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    ret
   %v = call <16 x half> @llvm.vp.nearbyint.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl)
@@ -258,19 +258,19 @@ define <8 x float> @vp_nearbyint_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: vp_nearbyint_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    ret
   %v = call <8 x float> @llvm.vp.nearbyint.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
@@ -300,19 +300,19 @@ define <16 x float> @vp_nearbyint_v16f32(<16 x float> %va, <16 x i1> %m, i32 zer
 ; CHECK-LABEL: vp_nearbyint_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    ret
   %v = call <16 x float> @llvm.vp.nearbyint.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
@@ -416,39 +416,39 @@ define <4 x double> @vp_nearbyint_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroe
 ; RV32-LABEL: vp_nearbyint_v4f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vmv1r.v v10, v0
+; RV32-NEXT:    vmv1r.v v12, v0
 ; RV32-NEXT:    lui a0, %hi(.LCPI18_0)
 ; RV32-NEXT:    fld fa5, %lo(.LCPI18_0)(a0)
-; RV32-NEXT:    vfabs.v v12, v8, v0.t
+; RV32-NEXT:    vfabs.v v10, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV32-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV32-NEXT:    frflags a0
-; RV32-NEXT:    vmv1r.v v0, v10
+; RV32-NEXT:    vmv1r.v v0, v12
 ; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32-NEXT:    fsflags a0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_nearbyint_v4f64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64-NEXT:    vmv1r.v v10, v0
-; RV64-NEXT:    vfabs.v v12, v8, v0.t
+; RV64-NEXT:    vmv1r.v v12, v0
+; RV64-NEXT:    vfabs.v v10, v8, v0.t
 ; RV64-NEXT:    li a0, 1075
 ; RV64-NEXT:    slli a0, a0, 52
 ; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV64-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV64-NEXT:    frflags a0
-; RV64-NEXT:    vmv1r.v v0, v10
+; RV64-NEXT:    vmv1r.v v0, v12
 ; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64-NEXT:    fsflags a0
 ; RV64-NEXT:    ret
   %v = call <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
@@ -494,39 +494,39 @@ define <8 x double> @vp_nearbyint_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroe
 ; RV32-LABEL: vp_nearbyint_v8f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vmv1r.v v12, v0
+; RV32-NEXT:    vmv1r.v v16, v0
 ; RV32-NEXT:    lui a0, %hi(.LCPI20_0)
 ; RV32-NEXT:    fld fa5, %lo(.LCPI20_0)(a0)
-; RV32-NEXT:    vfabs.v v16, v8, v0.t
+; RV32-NEXT:    vfabs.v v12, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV32-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV32-NEXT:    frflags a0
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmv1r.v v0, v16
 ; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32-NEXT:    fsflags a0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_nearbyint_v8f64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64-NEXT:    vmv1r.v v12, v0
-; RV64-NEXT:    vfabs.v v16, v8, v0.t
+; RV64-NEXT:    vmv1r.v v16, v0
+; RV64-NEXT:    vfabs.v v12, v8, v0.t
 ; RV64-NEXT:    li a0, 1075
 ; RV64-NEXT:    slli a0, a0, 52
 ; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV64-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV64-NEXT:    frflags a0
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmv1r.v v0, v16
 ; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64-NEXT:    fsflags a0
 ; RV64-NEXT:    ret
   %v = call <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
@@ -572,39 +572,39 @@ define <15 x double> @vp_nearbyint_v15f64(<15 x double> %va, <15 x i1> %m, i32 z
 ; RV32-LABEL: vp_nearbyint_v15f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vmv1r.v v16, v0
+; RV32-NEXT:    vmv1r.v v24, v0
 ; RV32-NEXT:    lui a0, %hi(.LCPI22_0)
 ; RV32-NEXT:    fld fa5, %lo(.LCPI22_0)(a0)
-; RV32-NEXT:    vfabs.v v24, v8, v0.t
+; RV32-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32-NEXT:    frflags a0
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vmv1r.v v0, v24
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    fsflags a0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_nearbyint_v15f64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vmv1r.v v16, v0
-; RV64-NEXT:    vfabs.v v24, v8, v0.t
+; RV64-NEXT:    vmv1r.v v24, v0
+; RV64-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64-NEXT:    li a0, 1075
 ; RV64-NEXT:    slli a0, a0, 52
 ; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64-NEXT:    frflags a0
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmv1r.v v0, v24
 ; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64-NEXT:    fsflags a0
 ; RV64-NEXT:    ret
   %v = call <15 x double> @llvm.vp.nearbyint.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
@@ -650,39 +650,39 @@ define <16 x double> @vp_nearbyint_v16f64(<16 x double> %va, <16 x i1> %m, i32 z
 ; RV32-LABEL: vp_nearbyint_v16f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vmv1r.v v16, v0
+; RV32-NEXT:    vmv1r.v v24, v0
 ; RV32-NEXT:    lui a0, %hi(.LCPI24_0)
 ; RV32-NEXT:    fld fa5, %lo(.LCPI24_0)(a0)
-; RV32-NEXT:    vfabs.v v24, v8, v0.t
+; RV32-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32-NEXT:    frflags a0
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vmv1r.v v0, v24
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    fsflags a0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_nearbyint_v16f64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vmv1r.v v16, v0
-; RV64-NEXT:    vfabs.v v24, v8, v0.t
+; RV64-NEXT:    vmv1r.v v24, v0
+; RV64-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64-NEXT:    li a0, 1075
 ; RV64-NEXT:    slli a0, a0, 52
 ; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64-NEXT:    frflags a0
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmv1r.v v0, v24
 ; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64-NEXT:    fsflags a0
 ; RV64-NEXT:    ret
   %v = call <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll
index 7540495c0d3b5..1d9ef739255ff 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll
@@ -122,19 +122,19 @@ define <16 x half> @vp_rint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %e
 ; CHECK-LABEL: vp_rint_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    li a0, 25
 ; CHECK-NEXT:    slli a0, a0, 10
 ; CHECK-NEXT:    fmv.h.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmflt.vf v12, v10, fa5, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x half> @llvm.vp.rint.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl)
   ret <16 x half> %v
@@ -234,18 +234,18 @@ define <8 x float> @vp_rint_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl
 ; CHECK-LABEL: vp_rint_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmflt.vf v12, v10, fa5, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x float> @llvm.vp.rint.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
   ret <8 x float> %v
@@ -272,18 +272,18 @@ define <16 x float> @vp_rint_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext
 ; CHECK-LABEL: vp_rint_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmflt.vf v16, v12, fa5, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x float> @llvm.vp.rint.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
   ret <16 x float> %v
@@ -376,36 +376,36 @@ define <4 x double> @vp_rint_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %e
 ; RV32-LABEL: vp_rint_v4f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vmv1r.v v10, v0
+; RV32-NEXT:    vmv1r.v v12, v0
 ; RV32-NEXT:    lui a0, %hi(.LCPI18_0)
 ; RV32-NEXT:    fld fa5, %lo(.LCPI18_0)(a0)
-; RV32-NEXT:    vfabs.v v12, v8, v0.t
+; RV32-NEXT:    vfabs.v v10, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32-NEXT:    vmflt.vf v10, v12, fa5, v0.t
-; RV32-NEXT:    vmv1r.v v0, v10
+; RV32-NEXT:    vmflt.vf v12, v10, fa5, v0.t
+; RV32-NEXT:    vmv1r.v v0, v12
 ; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_rint_v4f64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64-NEXT:    vmv1r.v v10, v0
-; RV64-NEXT:    vfabs.v v12, v8, v0.t
+; RV64-NEXT:    vmv1r.v v12, v0
+; RV64-NEXT:    vfabs.v v10, v8, v0.t
 ; RV64-NEXT:    li a0, 1075
 ; RV64-NEXT:    slli a0, a0, 52
 ; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64-NEXT:    vmflt.vf v10, v12, fa5, v0.t
-; RV64-NEXT:    vmv1r.v v0, v10
+; RV64-NEXT:    vmflt.vf v12, v10, fa5, v0.t
+; RV64-NEXT:    vmv1r.v v0, v12
 ; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <4 x double> @llvm.vp.rint.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
@@ -446,36 +446,36 @@ define <8 x double> @vp_rint_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %e
 ; RV32-LABEL: vp_rint_v8f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vmv1r.v v12, v0
+; RV32-NEXT:    vmv1r.v v16, v0
 ; RV32-NEXT:    lui a0, %hi(.LCPI20_0)
 ; RV32-NEXT:    fld fa5, %lo(.LCPI20_0)(a0)
-; RV32-NEXT:    vfabs.v v16, v8, v0.t
+; RV32-NEXT:    vfabs.v v12, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32-NEXT:    vmflt.vf v12, v16, fa5, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmflt.vf v16, v12, fa5, v0.t
+; RV32-NEXT:    vmv1r.v v0, v16
 ; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_rint_v8f64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64-NEXT:    vmv1r.v v12, v0
-; RV64-NEXT:    vfabs.v v16, v8, v0.t
+; RV64-NEXT:    vmv1r.v v16, v0
+; RV64-NEXT:    vfabs.v v12, v8, v0.t
 ; RV64-NEXT:    li a0, 1075
 ; RV64-NEXT:    slli a0, a0, 52
 ; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64-NEXT:    vmflt.vf v12, v16, fa5, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmflt.vf v16, v12, fa5, v0.t
+; RV64-NEXT:    vmv1r.v v0, v16
 ; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <8 x double> @llvm.vp.rint.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
   ret <8 x double> %v
@@ -516,36 +516,36 @@ define <15 x double> @vp_rint_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroex
 ; RV32-LABEL: vp_rint_v15f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vmv1r.v v16, v0
+; RV32-NEXT:    vmv1r.v v24, v0
 ; RV32-NEXT:    lui a0, %hi(.LCPI22_0)
 ; RV32-NEXT:    fld fa5, %lo(.LCPI22_0)(a0)
-; RV32-NEXT:    vfabs.v v24, v8, v0.t
+; RV32-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vmflt.vf v24, v16, fa5, v0.t
+; RV32-NEXT:    vmv1r.v v0, v24
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_rint_v15f64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vmv1r.v v16, v0
-; RV64-NEXT:    vfabs.v v24, v8, v0.t
+; RV64-NEXT:    vmv1r.v v24, v0
+; RV64-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64-NEXT:    li a0, 1075
 ; RV64-NEXT:    slli a0, a0, 52
 ; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmflt.vf v24, v16, fa5, v0.t
+; RV64-NEXT:    vmv1r.v v0, v24
 ; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <15 x double> @llvm.vp.rint.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
   ret <15 x double> %v
@@ -586,36 +586,36 @@ define <16 x double> @vp_rint_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroex
 ; RV32-LABEL: vp_rint_v16f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vmv1r.v v16, v0
+; RV32-NEXT:    vmv1r.v v24, v0
 ; RV32-NEXT:    lui a0, %hi(.LCPI24_0)
 ; RV32-NEXT:    fld fa5, %lo(.LCPI24_0)(a0)
-; RV32-NEXT:    vfabs.v v24, v8, v0.t
+; RV32-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vmflt.vf v24, v16, fa5, v0.t
+; RV32-NEXT:    vmv1r.v v0, v24
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; RV32-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_rint_v16f64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vmv1r.v v16, v0
-; RV64-NEXT:    vfabs.v v24, v8, v0.t
+; RV64-NEXT:    vmv1r.v v24, v0
+; RV64-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64-NEXT:    li a0, 1075
 ; RV64-NEXT:    slli a0, a0, 52
 ; RV64-NEXT:    fmv.d.x fa5, a0
 ; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmflt.vf v24, v16, fa5, v0.t
+; RV64-NEXT:    vmv1r.v v0, v24
 ; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; RV64-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <16 x double> @llvm.vp.rint.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
   ret <16 x double> %v

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll
index de5427f329496..ebac7656be29a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll
@@ -202,24 +202,24 @@ define <8 x half> @vp_round_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl)
 ; ZVFHMIN-LABEL: vp_round_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v9, v0
+; ZVFHMIN-NEXT:    vmv1r.v v12, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
-; ZVFHMIN-NEXT:    vmv1r.v v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v13, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v8, v12, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v13, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    fsrmi a0, 4
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmv1r.v v0, v13
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v9
+; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -270,44 +270,44 @@ define <16 x half> @vp_round_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %
 ; ZVFH-LABEL: vp_round_v16f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vmv1r.v v12, v0
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    li a0, 25
 ; ZVFH-NEXT:    slli a0, a0, 10
 ; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 4
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmv1r.v v0, v12
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_round_v16f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v10, v0
+; ZVFHMIN-NEXT:    vmv1r.v v16, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
-; ZVFHMIN-NEXT:    vmv1r.v v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v17, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v8, v16, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v17, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    fsrmi a0, 4
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmv1r.v v0, v17
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; ZVFHMIN-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -438,20 +438,20 @@ define <8 x float> @vp_round_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %ev
 ; CHECK-LABEL: vp_round_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x float> @llvm.vp.round.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
   ret <8 x float> %v
@@ -480,20 +480,20 @@ define <16 x float> @vp_round_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext
 ; CHECK-LABEL: vp_round_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x float> @llvm.vp.round.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
   ret <16 x float> %v
@@ -662,79 +662,79 @@ define <4 x double> @vp_round_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %
 ; RV32ZVFH-LABEL: vp_round_v4f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v10, v0
+; RV32ZVFH-NEXT:    vmv1r.v v12, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI18_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI18_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 4
-; RV32ZVFH-NEXT:    vmv1r.v v0, v10
+; RV32ZVFH-NEXT:    vmv1r.v v0, v12
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_round_v4f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v10, v0
-; RV64ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v12, v0
+; RV64ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 4
-; RV64ZVFH-NEXT:    vmv1r.v v0, v10
+; RV64ZVFH-NEXT:    vmv1r.v v0, v12
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_round_v4f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v10, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v12, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI18_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI18_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v10, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 4
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_round_v4f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v10, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v12, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v10, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 4
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <4 x double> @llvm.vp.round.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
@@ -810,79 +810,79 @@ define <8 x double> @vp_round_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %
 ; RV32ZVFH-LABEL: vp_round_v8f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v12, v0
+; RV32ZVFH-NEXT:    vmv1r.v v16, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI20_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI20_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 4
-; RV32ZVFH-NEXT:    vmv1r.v v0, v12
+; RV32ZVFH-NEXT:    vmv1r.v v0, v16
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_round_v8f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v12, v0
-; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v16, v0
+; RV64ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 4
-; RV64ZVFH-NEXT:    vmv1r.v v0, v12
+; RV64ZVFH-NEXT:    vmv1r.v v0, v16
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_round_v8f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v12, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v16, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI20_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI20_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 4
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v12
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_round_v8f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v12, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 4
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v12
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <8 x double> @llvm.vp.round.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
   ret <8 x double> %v
@@ -958,79 +958,79 @@ define <15 x double> @vp_round_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroe
 ; RV32ZVFH-LABEL: vp_round_v15f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v16, v0
+; RV32ZVFH-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI22_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI22_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 4
-; RV32ZVFH-NEXT:    vmv1r.v v0, v16
+; RV32ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_round_v15f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v16, v0
-; RV64ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v24, v0
+; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 4
-; RV64ZVFH-NEXT:    vmv1r.v v0, v16
+; RV64ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_round_v15f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v16, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI22_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI22_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 4
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_round_v15f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v24, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 4
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <15 x double> @llvm.vp.round.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
   ret <15 x double> %v
@@ -1106,79 +1106,79 @@ define <16 x double> @vp_round_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroe
 ; RV32ZVFH-LABEL: vp_round_v16f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v16, v0
+; RV32ZVFH-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI24_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI24_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 4
-; RV32ZVFH-NEXT:    vmv1r.v v0, v16
+; RV32ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_round_v16f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v16, v0
-; RV64ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v24, v0
+; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 4
-; RV64ZVFH-NEXT:    vmv1r.v v0, v16
+; RV64ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_round_v16f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v16, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI24_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI24_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 4
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_round_v16f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v24, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 4
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <16 x double> @llvm.vp.round.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
   ret <16 x double> %v

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll
index 1c923e3f12171..43566185a200b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll
@@ -202,24 +202,24 @@ define <8 x half> @vp_roundeven_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %
 ; ZVFHMIN-LABEL: vp_roundeven_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v9, v0
+; ZVFHMIN-NEXT:    vmv1r.v v12, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
-; ZVFHMIN-NEXT:    vmv1r.v v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v13, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v8, v12, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v13, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    fsrmi a0, 0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmv1r.v v0, v13
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v9
+; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -270,44 +270,44 @@ define <16 x half> @vp_roundeven_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroe
 ; ZVFH-LABEL: vp_roundeven_v16f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vmv1r.v v12, v0
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    li a0, 25
 ; ZVFH-NEXT:    slli a0, a0, 10
 ; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 0
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmv1r.v v0, v12
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_roundeven_v16f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v10, v0
+; ZVFHMIN-NEXT:    vmv1r.v v16, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
-; ZVFHMIN-NEXT:    vmv1r.v v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v17, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v8, v16, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v17, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    fsrmi a0, 0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmv1r.v v0, v17
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; ZVFHMIN-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -438,20 +438,20 @@ define <8 x float> @vp_roundeven_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: vp_roundeven_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x float> @llvm.vp.roundeven.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
   ret <8 x float> %v
@@ -480,20 +480,20 @@ define <16 x float> @vp_roundeven_v16f32(<16 x float> %va, <16 x i1> %m, i32 zer
 ; CHECK-LABEL: vp_roundeven_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x float> @llvm.vp.roundeven.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
   ret <16 x float> %v
@@ -662,79 +662,79 @@ define <4 x double> @vp_roundeven_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroe
 ; RV32ZVFH-LABEL: vp_roundeven_v4f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v10, v0
+; RV32ZVFH-NEXT:    vmv1r.v v12, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI18_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI18_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 0
-; RV32ZVFH-NEXT:    vmv1r.v v0, v10
+; RV32ZVFH-NEXT:    vmv1r.v v0, v12
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_roundeven_v4f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v10, v0
-; RV64ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v12, v0
+; RV64ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 0
-; RV64ZVFH-NEXT:    vmv1r.v v0, v10
+; RV64ZVFH-NEXT:    vmv1r.v v0, v12
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_roundeven_v4f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v10, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v12, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI18_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI18_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v10, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 0
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_roundeven_v4f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v10, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v12, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v10, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 0
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <4 x double> @llvm.vp.roundeven.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
@@ -810,79 +810,79 @@ define <8 x double> @vp_roundeven_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroe
 ; RV32ZVFH-LABEL: vp_roundeven_v8f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v12, v0
+; RV32ZVFH-NEXT:    vmv1r.v v16, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI20_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI20_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 0
-; RV32ZVFH-NEXT:    vmv1r.v v0, v12
+; RV32ZVFH-NEXT:    vmv1r.v v0, v16
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_roundeven_v8f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v12, v0
-; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v16, v0
+; RV64ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 0
-; RV64ZVFH-NEXT:    vmv1r.v v0, v12
+; RV64ZVFH-NEXT:    vmv1r.v v0, v16
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_roundeven_v8f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v12, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v16, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI20_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI20_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 0
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v12
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_roundeven_v8f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v12, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 0
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v12
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <8 x double> @llvm.vp.roundeven.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
   ret <8 x double> %v
@@ -958,79 +958,79 @@ define <15 x double> @vp_roundeven_v15f64(<15 x double> %va, <15 x i1> %m, i32 z
 ; RV32ZVFH-LABEL: vp_roundeven_v15f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v16, v0
+; RV32ZVFH-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI22_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI22_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 0
-; RV32ZVFH-NEXT:    vmv1r.v v0, v16
+; RV32ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_roundeven_v15f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v16, v0
-; RV64ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v24, v0
+; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 0
-; RV64ZVFH-NEXT:    vmv1r.v v0, v16
+; RV64ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_roundeven_v15f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v16, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI22_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI22_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 0
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_roundeven_v15f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v24, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 0
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <15 x double> @llvm.vp.roundeven.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
   ret <15 x double> %v
@@ -1106,79 +1106,79 @@ define <16 x double> @vp_roundeven_v16f64(<16 x double> %va, <16 x i1> %m, i32 z
 ; RV32ZVFH-LABEL: vp_roundeven_v16f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v16, v0
+; RV32ZVFH-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI24_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI24_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 0
-; RV32ZVFH-NEXT:    vmv1r.v v0, v16
+; RV32ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_roundeven_v16f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v16, v0
-; RV64ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v24, v0
+; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 0
-; RV64ZVFH-NEXT:    vmv1r.v v0, v16
+; RV64ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_roundeven_v16f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v16, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI24_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI24_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 0
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_roundeven_v16f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v24, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 0
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <16 x double> @llvm.vp.roundeven.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
   ret <16 x double> %v

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll
index 83cbd2b760341..7cae17e8e1ec7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll
@@ -202,24 +202,24 @@ define <8 x half> @vp_roundtozero_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext
 ; ZVFHMIN-LABEL: vp_roundtozero_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v9, v0
+; ZVFHMIN-NEXT:    vmv1r.v v12, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
-; ZVFHMIN-NEXT:    vmv1r.v v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v13, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v8, v12, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v13, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    fsrmi a0, 1
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmv1r.v v0, v13
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v9
+; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -270,44 +270,44 @@ define <16 x half> @vp_roundtozero_v16f16(<16 x half> %va, <16 x i1> %m, i32 zer
 ; ZVFH-LABEL: vp_roundtozero_v16f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vmv1r.v v12, v0
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    li a0, 25
 ; ZVFH-NEXT:    slli a0, a0, 10
 ; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 1
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmv1r.v v0, v12
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_roundtozero_v16f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v10, v0
+; ZVFHMIN-NEXT:    vmv1r.v v16, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
-; ZVFHMIN-NEXT:    vmv1r.v v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v17, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v8, v16, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v17, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    fsrmi a0, 1
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmv1r.v v0, v17
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; ZVFHMIN-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -438,20 +438,20 @@ define <8 x float> @vp_roundtozero_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroe
 ; CHECK-LABEL: vp_roundtozero_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
   ret <8 x float> %v
@@ -480,20 +480,20 @@ define <16 x float> @vp_roundtozero_v16f32(<16 x float> %va, <16 x i1> %m, i32 z
 ; CHECK-LABEL: vp_roundtozero_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x float> @llvm.vp.roundtozero.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
   ret <16 x float> %v
@@ -662,79 +662,79 @@ define <4 x double> @vp_roundtozero_v4f64(<4 x double> %va, <4 x i1> %m, i32 zer
 ; RV32ZVFH-LABEL: vp_roundtozero_v4f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v10, v0
+; RV32ZVFH-NEXT:    vmv1r.v v12, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI18_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI18_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 1
-; RV32ZVFH-NEXT:    vmv1r.v v0, v10
+; RV32ZVFH-NEXT:    vmv1r.v v0, v12
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_roundtozero_v4f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v10, v0
-; RV64ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v12, v0
+; RV64ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 1
-; RV64ZVFH-NEXT:    vmv1r.v v0, v10
+; RV64ZVFH-NEXT:    vmv1r.v v0, v12
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_roundtozero_v4f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v10, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v12, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI18_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI18_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v10, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 1
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_roundtozero_v4f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v10, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v12, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v10, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 1
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
@@ -810,79 +810,79 @@ define <8 x double> @vp_roundtozero_v8f64(<8 x double> %va, <8 x i1> %m, i32 zer
 ; RV32ZVFH-LABEL: vp_roundtozero_v8f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v12, v0
+; RV32ZVFH-NEXT:    vmv1r.v v16, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI20_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI20_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 1
-; RV32ZVFH-NEXT:    vmv1r.v v0, v12
+; RV32ZVFH-NEXT:    vmv1r.v v0, v16
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_roundtozero_v8f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v12, v0
-; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v16, v0
+; RV64ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 1
-; RV64ZVFH-NEXT:    vmv1r.v v0, v12
+; RV64ZVFH-NEXT:    vmv1r.v v0, v16
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_roundtozero_v8f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v12, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v16, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI20_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI20_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 1
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v12
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_roundtozero_v8f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v12, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 1
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v12
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
   ret <8 x double> %v
@@ -958,79 +958,79 @@ define <15 x double> @vp_roundtozero_v15f64(<15 x double> %va, <15 x i1> %m, i32
 ; RV32ZVFH-LABEL: vp_roundtozero_v15f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v16, v0
+; RV32ZVFH-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI22_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI22_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 1
-; RV32ZVFH-NEXT:    vmv1r.v v0, v16
+; RV32ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_roundtozero_v15f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v16, v0
-; RV64ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v24, v0
+; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 1
-; RV64ZVFH-NEXT:    vmv1r.v v0, v16
+; RV64ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_roundtozero_v15f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v16, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI22_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI22_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 1
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_roundtozero_v15f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v24, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 1
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <15 x double> @llvm.vp.roundtozero.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
   ret <15 x double> %v
@@ -1106,79 +1106,79 @@ define <16 x double> @vp_roundtozero_v16f64(<16 x double> %va, <16 x i1> %m, i32
 ; RV32ZVFH-LABEL: vp_roundtozero_v16f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v16, v0
+; RV32ZVFH-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI24_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI24_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 1
-; RV32ZVFH-NEXT:    vmv1r.v v0, v16
+; RV32ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_roundtozero_v16f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v16, v0
-; RV64ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v24, v0
+; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 1
-; RV64ZVFH-NEXT:    vmv1r.v v0, v16
+; RV64ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_roundtozero_v16f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v16, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI24_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI24_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 1
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_roundtozero_v16f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v24, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 1
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
   ret <16 x double> %v

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll
index af3e9db9fe123..db2cbc82a017b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll
@@ -62,12 +62,12 @@ define <8 x i1> @fcmp_oeq_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v10, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfeq.vv v12, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -86,12 +86,12 @@ define <8 x i1> @fcmp_oeq_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfeq.vv v12, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -130,12 +130,12 @@ define <8 x i1> @fcmp_ogt_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v12, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -154,12 +154,12 @@ define <8 x i1> @fcmp_ogt_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v10, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v12, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -198,12 +198,12 @@ define <8 x i1> @fcmp_oge_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v12, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -222,12 +222,12 @@ define <8 x i1> @fcmp_oge_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v10, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v12, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -266,12 +266,12 @@ define <8 x i1> @fcmp_olt_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v10, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v12, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -290,12 +290,12 @@ define <8 x i1> @fcmp_olt_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v12, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -334,12 +334,12 @@ define <8 x i1> @fcmp_ole_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v10, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v12, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -358,12 +358,12 @@ define <8 x i1> @fcmp_ole_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v12, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -927,12 +927,12 @@ define <8 x i1> @fcmp_une_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v8, v10, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfne.vv v12, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -951,12 +951,12 @@ define <8 x i1> @fcmp_une_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfne.vv v12, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll
index efc0f7ef4a441..5777d76072d80 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll
@@ -880,10 +880,10 @@ define <8 x i1> @icmp_uge_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroe
 ; CHECK-LABEL: icmp_uge_vx_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v10, a0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsleu.vv v10, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsleu.vv v12, v10, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -1059,10 +1059,10 @@ define <8 x i1> @icmp_sge_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroe
 ; CHECK-LABEL: icmp_sge_vx_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v10, a0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsle.vv v10, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsle.vv v12, v10, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -1192,10 +1192,10 @@ define <8 x i1> @icmp_sle_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32
 ; CHECK-LABEL: icmp_sle_vx_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v10, a0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsle.vv v10, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsle.vv v12, v10, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -1353,10 +1353,10 @@ define <8 x i1> @icmp_eq_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroex
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmseq.vv v12, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmseq.vv v16, v8, v12, v0.t
+; RV32-NEXT:    vmv1r.v v0, v16
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -1382,10 +1382,10 @@ define <8 x i1> @icmp_eq_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 z
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmseq.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmseq.vv v16, v12, v8, v0.t
+; RV32-NEXT:    vmv1r.v v0, v16
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -1444,10 +1444,10 @@ define <8 x i1> @icmp_ne_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroex
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsne.vv v12, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsne.vv v16, v8, v12, v0.t
+; RV32-NEXT:    vmv1r.v v0, v16
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -1473,10 +1473,10 @@ define <8 x i1> @icmp_ne_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 z
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsne.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsne.vv v16, v12, v8, v0.t
+; RV32-NEXT:    vmv1r.v v0, v16
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -1535,10 +1535,10 @@ define <8 x i1> @icmp_ugt_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsltu.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsltu.vv v16, v12, v8, v0.t
+; RV32-NEXT:    vmv1r.v v0, v16
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -1564,10 +1564,10 @@ define <8 x i1> @icmp_ugt_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsltu.vv v12, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsltu.vv v16, v8, v12, v0.t
+; RV32-NEXT:    vmv1r.v v0, v16
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -1626,10 +1626,10 @@ define <8 x i1> @icmp_uge_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsleu.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsleu.vv v16, v12, v8, v0.t
+; RV32-NEXT:    vmv1r.v v0, v16
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -1637,10 +1637,10 @@ define <8 x i1> @icmp_uge_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
 ; RV64-LABEL: icmp_uge_vx_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT:    vmv.v.x v16, a0
+; RV64-NEXT:    vmv.v.x v12, a0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsleu.vv v12, v16, v8, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsleu.vv v16, v12, v8, v0.t
+; RV64-NEXT:    vmv1r.v v0, v16
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -1657,10 +1657,10 @@ define <8 x i1> @icmp_uge_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsleu.vv v12, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsleu.vv v16, v8, v12, v0.t
+; RV32-NEXT:    vmv1r.v v0, v16
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -1719,10 +1719,10 @@ define <8 x i1> @icmp_ult_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsltu.vv v12, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsltu.vv v16, v8, v12, v0.t
+; RV32-NEXT:    vmv1r.v v0, v16
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -1748,10 +1748,10 @@ define <8 x i1> @icmp_ult_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsltu.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsltu.vv v16, v12, v8, v0.t
+; RV32-NEXT:    vmv1r.v v0, v16
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -1810,10 +1810,10 @@ define <8 x i1> @icmp_sgt_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmslt.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmslt.vv v16, v12, v8, v0.t
+; RV32-NEXT:    vmv1r.v v0, v16
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -1839,10 +1839,10 @@ define <8 x i1> @icmp_sgt_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmslt.vv v12, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmslt.vv v16, v8, v12, v0.t
+; RV32-NEXT:    vmv1r.v v0, v16
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -1901,10 +1901,10 @@ define <8 x i1> @icmp_sge_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsle.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsle.vv v16, v12, v8, v0.t
+; RV32-NEXT:    vmv1r.v v0, v16
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -1912,10 +1912,10 @@ define <8 x i1> @icmp_sge_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
 ; RV64-LABEL: icmp_sge_vx_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT:    vmv.v.x v16, a0
+; RV64-NEXT:    vmv.v.x v12, a0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsle.vv v12, v16, v8, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsle.vv v16, v12, v8, v0.t
+; RV64-NEXT:    vmv1r.v v0, v16
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -1932,10 +1932,10 @@ define <8 x i1> @icmp_sge_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsle.vv v12, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsle.vv v16, v8, v12, v0.t
+; RV32-NEXT:    vmv1r.v v0, v16
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -1994,10 +1994,10 @@ define <8 x i1> @icmp_slt_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmslt.vv v12, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmslt.vv v16, v8, v12, v0.t
+; RV32-NEXT:    vmv1r.v v0, v16
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -2023,10 +2023,10 @@ define <8 x i1> @icmp_slt_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmslt.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmslt.vv v16, v12, v8, v0.t
+; RV32-NEXT:    vmv1r.v v0, v16
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -2085,10 +2085,10 @@ define <8 x i1> @icmp_sle_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsle.vv v12, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsle.vv v16, v8, v12, v0.t
+; RV32-NEXT:    vmv1r.v v0, v16
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -2114,10 +2114,10 @@ define <8 x i1> @icmp_sle_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsle.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsle.vv v16, v12, v8, v0.t
+; RV32-NEXT:    vmv1r.v v0, v16
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -2125,10 +2125,10 @@ define <8 x i1> @icmp_sle_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
 ; RV64-LABEL: icmp_sle_vx_swap_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT:    vmv.v.x v16, a0
+; RV64-NEXT:    vmv.v.x v12, a0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsle.vv v12, v16, v8, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsle.vv v16, v12, v8, v0.t
+; RV64-NEXT:    vmv1r.v v0, v16
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfclass-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfclass-vp.ll
index 9eb92f45bb149..0a8c11f0f8813 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfclass-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfclass-vp.ll
@@ -86,10 +86,10 @@ define <8 x i1> @isnan_v8f32(<8 x float> %x,  <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: isnan_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vfclass.v v10, v8, v0.t
+; CHECK-NEXT:    vfclass.v v8, v8, v0.t
 ; CHECK-NEXT:    li a0, 512
-; CHECK-NEXT:    vmseq.vx v8, v10, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmseq.vx v10, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.vp.is.fpclass.v8f32(<8 x float> %x, i32 2, <8 x i1> %m, i32 %evl)
   ret <8 x i1> %1
@@ -111,10 +111,10 @@ define <16 x i1> @isnan_v16f32(<16 x float> %x, <16 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: isnan_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vfclass.v v12, v8, v0.t
+; CHECK-NEXT:    vfclass.v v8, v8, v0.t
 ; CHECK-NEXT:    li a0, 256
-; CHECK-NEXT:    vmseq.vx v8, v12, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmseq.vx v12, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.vp.is.fpclass.v16f32(<16 x float> %x, i32 1, <16 x i1> %m, i32 %evl)
   ret <16 x i1> %1
@@ -162,10 +162,10 @@ define <4 x i1> @isposinf_v4f64(<4 x double> %x, <4 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: isposinf_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfclass.v v10, v8, v0.t
+; CHECK-NEXT:    vfclass.v v8, v8, v0.t
 ; CHECK-NEXT:    li a0, 128
-; CHECK-NEXT:    vmseq.vx v8, v10, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmseq.vx v10, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.vp.is.fpclass.v4f64(<4 x double> %x, i32 512, <4 x i1> %m, i32 %evl) ; 0x200 = "+inf"
   ret <4 x i1> %1
@@ -187,9 +187,9 @@ define <8 x i1> @isneginf_v8f64(<8 x double> %x, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: isneginf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfclass.v v12, v8, v0.t
-; CHECK-NEXT:    vmseq.vi v8, v12, 1, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vfclass.v v8, v8, v0.t
+; CHECK-NEXT:    vmseq.vi v12, v8, 1, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.vp.is.fpclass.v8f64(<8 x double> %x, i32 4, <8 x i1> %m, i32 %evl) ; "-inf"
   ret <8 x i1> %1
@@ -212,9 +212,9 @@ define <16 x i1> @isfinite_v16f64(<16 x double> %x, <16 x i1> %m, i32 zeroext %e
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vfclass.v v8, v8, v0.t
 ; CHECK-NEXT:    li a0, 126
-; CHECK-NEXT:    vand.vx v16, v8, a0, v0.t
-; CHECK-NEXT:    vmsne.vi v8, v16, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vmsne.vi v16, v8, 0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.vp.is.fpclass.v16f64(<16 x double> %x, i32 504, <16 x i1> %m, i32 %evl) ; 0x1f8 = "finite"
   ret <16 x i1> %1
@@ -239,9 +239,9 @@ define <16 x i1> @isposfinite_v16f64(<16 x double> %x, <16 x i1> %m, i32 zeroext
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vfclass.v v8, v8, v0.t
 ; CHECK-NEXT:    li a0, 112
-; CHECK-NEXT:    vand.vx v16, v8, a0, v0.t
-; CHECK-NEXT:    vmsne.vi v8, v16, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vmsne.vi v16, v8, 0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.vp.is.fpclass.v16f64(<16 x double> %x, i32 448, <16 x i1> %m, i32 %evl) ; 0x1c0 = "+finite"
   ret <16 x i1> %1
@@ -265,9 +265,9 @@ define <16 x i1> @isnotfinite_v16f64(<16 x double> %x, <16 x i1> %m, i32 zeroext
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vfclass.v v8, v8, v0.t
 ; CHECK-NEXT:    li a0, 897
-; CHECK-NEXT:    vand.vx v16, v8, a0, v0.t
-; CHECK-NEXT:    vmsne.vi v8, v16, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vmsne.vi v16, v8, 0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.vp.is.fpclass.v16f64(<16 x double> %x, i32 519, <16 x i1> %m, i32 %evl) ; 0x207 = "inf|nan"
   ret <16 x i1> %1

diff  --git a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll
index e2deefa26ecb3..a107cb82964eb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll
@@ -120,24 +120,24 @@ define <vscale x 4 x bfloat> @vp_floor_nxv4bf16(<vscale x 4 x bfloat> %va, <vsca
 ; CHECK-LABEL: vp_floor_nxv4bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v9, v0
+; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
-; CHECK-NEXT:    vmv1r.v v8, v0
+; CHECK-NEXT:    vmv1r.v v13, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v10, v0.t
+; CHECK-NEXT:    vfabs.v v8, v10, v0.t
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v8, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v13, v8, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v10, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v8, v10, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v9
+; CHECK-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10, v0.t
 ; CHECK-NEXT:    ret
@@ -172,24 +172,24 @@ define <vscale x 8 x bfloat> @vp_floor_nxv8bf16(<vscale x 8 x bfloat> %va, <vsca
 ; CHECK-LABEL: vp_floor_nxv8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
+; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
-; CHECK-NEXT:    vmv1r.v v8, v0
+; CHECK-NEXT:    vmv1r.v v17, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v12, v0.t
+; CHECK-NEXT:    vfabs.v v8, v12, v0.t
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v8, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v17, v8, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmv1r.v v0, v17
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v8, v12, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12, v0.t
 ; CHECK-NEXT:    ret
@@ -224,24 +224,24 @@ define <vscale x 16 x bfloat> @vp_floor_nxv16bf16(<vscale x 16 x bfloat> %va, <v
 ; CHECK-LABEL: vp_floor_nxv16bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmv1r.v v24, v0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
-; CHECK-NEXT:    vmv1r.v v8, v0
+; CHECK-NEXT:    vmv1r.v v25, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v16, v0.t
+; CHECK-NEXT:    vfabs.v v8, v16, v0.t
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v8, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v25, v8, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
 ; CHECK-NEXT:    ret
@@ -585,24 +585,24 @@ define <vscale x 4 x half> @vp_floor_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN-LABEL: vp_floor_nxv4f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v9, v0
+; ZVFHMIN-NEXT:    vmv1r.v v12, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
-; ZVFHMIN-NEXT:    vmv1r.v v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v13, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v8, v12, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v13, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    fsrmi a0, 2
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmv1r.v v0, v13
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v9
+; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -653,44 +653,44 @@ define <vscale x 8 x half> @vp_floor_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: vp_floor_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vmv1r.v v12, v0
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    li a0, 25
 ; ZVFH-NEXT:    slli a0, a0, 10
 ; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 2
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmv1r.v v0, v12
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_floor_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v10, v0
+; ZVFHMIN-NEXT:    vmv1r.v v16, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
-; ZVFHMIN-NEXT:    vmv1r.v v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v17, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v8, v16, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v17, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    fsrmi a0, 2
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmv1r.v v0, v17
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; ZVFHMIN-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -741,44 +741,44 @@ define <vscale x 16 x half> @vp_floor_nxv16f16(<vscale x 16 x half> %va, <vscale
 ; ZVFH-LABEL: vp_floor_nxv16f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFH-NEXT:    vmv1r.v v12, v0
-; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-NEXT:    vmv1r.v v16, v0
+; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; ZVFH-NEXT:    li a0, 25
 ; ZVFH-NEXT:    slli a0, a0, 10
 ; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 2
-; ZVFH-NEXT:    vmv1r.v v0, v12
+; ZVFH-NEXT:    vmv1r.v v0, v16
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_floor_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v12, v0
+; ZVFHMIN-NEXT:    vmv1r.v v24, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
-; ZVFHMIN-NEXT:    vmv1r.v v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v25, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v8, v24, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v25, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    fsrmi a0, 2
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmv1r.v v0, v25
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v12
+; ZVFHMIN-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -829,21 +829,21 @@ define <vscale x 32 x half> @vp_floor_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFH-LABEL: vp_floor_nxv32f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; ZVFH-NEXT:    vmv1r.v v16, v0
-; ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-NEXT:    vmv1r.v v24, v0
+; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; ZVFH-NEXT:    li a0, 25
 ; ZVFH-NEXT:    slli a0, a0, 10
 ; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 2
-; ZVFH-NEXT:    vmv1r.v v0, v16
+; ZVFH-NEXT:    vmv1r.v v0, v24
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_floor_nxv32f16:
@@ -1064,20 +1064,20 @@ define <vscale x 4 x float> @vp_floor_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: vp_floor_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x float> @llvm.vp.floor.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x float> %v
@@ -1106,20 +1106,20 @@ define <vscale x 8 x float> @vp_floor_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: vp_floor_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x float> @llvm.vp.floor.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x float> %v
@@ -1148,20 +1148,20 @@ define <vscale x 16 x float> @vp_floor_nxv16f32(<vscale x 16 x float> %va, <vsca
 ; CHECK-LABEL: vp_floor_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x float> @llvm.vp.floor.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x float> %v
@@ -1330,79 +1330,79 @@ define <vscale x 2 x double> @vp_floor_nxv2f64(<vscale x 2 x double> %va, <vscal
 ; RV32ZVFH-LABEL: vp_floor_nxv2f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v10, v0
+; RV32ZVFH-NEXT:    vmv1r.v v12, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI36_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI36_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 2
-; RV32ZVFH-NEXT:    vmv1r.v v0, v10
+; RV32ZVFH-NEXT:    vmv1r.v v0, v12
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_floor_nxv2f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v10, v0
-; RV64ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v12, v0
+; RV64ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 2
-; RV64ZVFH-NEXT:    vmv1r.v v0, v10
+; RV64ZVFH-NEXT:    vmv1r.v v0, v12
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_floor_nxv2f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v10, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v12, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI36_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI36_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v10, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 2
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_floor_nxv2f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v10, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v12, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v10, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 2
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.floor.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
@@ -1478,79 +1478,79 @@ define <vscale x 4 x double> @vp_floor_nxv4f64(<vscale x 4 x double> %va, <vscal
 ; RV32ZVFH-LABEL: vp_floor_nxv4f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v12, v0
+; RV32ZVFH-NEXT:    vmv1r.v v16, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI38_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI38_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 2
-; RV32ZVFH-NEXT:    vmv1r.v v0, v12
+; RV32ZVFH-NEXT:    vmv1r.v v0, v16
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_floor_nxv4f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v12, v0
-; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v16, v0
+; RV64ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 2
-; RV64ZVFH-NEXT:    vmv1r.v v0, v12
+; RV64ZVFH-NEXT:    vmv1r.v v0, v16
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_floor_nxv4f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v12, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v16, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI38_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI38_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 2
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v12
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_floor_nxv4f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v12, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 2
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v12
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.vp.floor.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x double> %v
@@ -1626,79 +1626,79 @@ define <vscale x 7 x double> @vp_floor_nxv7f64(<vscale x 7 x double> %va, <vscal
 ; RV32ZVFH-LABEL: vp_floor_nxv7f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v16, v0
+; RV32ZVFH-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI40_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI40_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 2
-; RV32ZVFH-NEXT:    vmv1r.v v0, v16
+; RV32ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_floor_nxv7f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v16, v0
-; RV64ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v24, v0
+; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 2
-; RV64ZVFH-NEXT:    vmv1r.v v0, v16
+; RV64ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_floor_nxv7f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v16, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI40_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI40_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 2
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_floor_nxv7f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v24, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 2
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <vscale x 7 x double> @llvm.vp.floor.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x double> %v
@@ -1774,79 +1774,79 @@ define <vscale x 8 x double> @vp_floor_nxv8f64(<vscale x 8 x double> %va, <vscal
 ; RV32ZVFH-LABEL: vp_floor_nxv8f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v16, v0
+; RV32ZVFH-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI42_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI42_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 2
-; RV32ZVFH-NEXT:    vmv1r.v v0, v16
+; RV32ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_floor_nxv8f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v16, v0
-; RV64ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v24, v0
+; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 2
-; RV64ZVFH-NEXT:    vmv1r.v v0, v16
+; RV64ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_floor_nxv8f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v16, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI42_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI42_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 2
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_floor_nxv8f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v24, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 2
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.floor.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x double> %v

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll
index 0e0c92b150d33..c432204b078f5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll
@@ -110,13 +110,13 @@ define <vscale x 4 x bfloat> @vfmax_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v8, v14, v14, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v16, v14, v12, v0
+; CHECK-NEXT:    vmerge.vvm v8, v14, v12, v0
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v14, v0
+; CHECK-NEXT:    vmfeq.vv v11, v12, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmerge.vvm v12, v12, v14, v0
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfmax.vv v12, v8, v16, v0.t
+; CHECK-NEXT:    vfmax.vv v12, v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12, v0.t
 ; CHECK-NEXT:    ret
@@ -155,13 +155,13 @@ define <vscale x 8 x bfloat> @vfmax_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v8, v20, v20, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v24, v20, v16, v0
+; CHECK-NEXT:    vmerge.vvm v8, v20, v16, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v16, v20, v0
+; CHECK-NEXT:    vmfeq.vv v13, v16, v16, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmerge.vvm v16, v16, v20, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfmax.vv v16, v8, v24, v0.t
+; CHECK-NEXT:    vfmax.vv v16, v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
 ; CHECK-NEXT:    ret
@@ -193,12 +193,6 @@ define <vscale x 8 x bfloat> @vfmax_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %
 define <vscale x 16 x bfloat> @vfmax_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfmax_vv_nxv16bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vmv1r.v v7, v0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12, v0.t
@@ -207,23 +201,14 @@ define <vscale x 16 x bfloat> @vfmax_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <v
 ; CHECK-NEXT:    vmfeq.vv v8, v16, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v8, v24, v24, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
+; CHECK-NEXT:    vmfeq.vv v6, v24, v24, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v6
+; CHECK-NEXT:    vmerge.vvm v16, v24, v16, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfmax.vv v16, v8, v16, v0.t
+; CHECK-NEXT:    vfmax.vv v16, v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    .cfi_def_cfa sp, 16
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.vp.maximum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x bfloat> %v
@@ -261,11 +246,6 @@ define <vscale x 32 x bfloat> @vfmax_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vmv1r.v v7, v0
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
-; CHECK-NEXT:    add a1, sp, a1
-; CHECK-NEXT:    addi a1, a1, 16
-; CHECK-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    slli a1, a2, 1
 ; CHECK-NEXT:    srli a2, a2, 2
@@ -278,37 +258,25 @@ define <vscale x 32 x bfloat> @vfmax_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
 ; CHECK-NEXT:    addi a3, sp, 16
 ; CHECK-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v5, v16, v16, v0.t
-; CHECK-NEXT:    vmv8r.v v8, v16
+; CHECK-NEXT:    vmfeq.vv v5, v24, v24, v0.t
 ; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 4
+; CHECK-NEXT:    slli a2, a2, 3
 ; CHECK-NEXT:    add a2, sp, a2
 ; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v28, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v20, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v5
-; CHECK-NEXT:    vmv8r.v v24, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vmerge.vvm v16, v24, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    vmfeq.vv v12, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmerge.vvm v16, v16, v24, v0
+; CHECK-NEXT:    vmfeq.vv v5, v8, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v5
+; CHECK-NEXT:    vmerge.vvm v8, v8, v24, v0
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfmax.vv v16, v16, v8, v0.t
+; CHECK-NEXT:    vfmax.vv v16, v8, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16, v0.t
 ; CHECK-NEXT:    bltu a0, a1, .LBB10_2
@@ -317,18 +285,18 @@ define <vscale x 32 x bfloat> @vfmax_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
 ; CHECK-NEXT:  .LBB10_2:
 ; CHECK-NEXT:    vmv1r.v v0, v7
 ; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v24, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v16, v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16, v0.t
+; CHECK-NEXT:    vmfeq.vv v8, v24, v24, v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
@@ -336,14 +304,14 @@ define <vscale x 32 x bfloat> @vfmax_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v24, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vmerge.vvm v24, v24, v16, v0
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
@@ -351,14 +319,14 @@ define <vscale x 32 x bfloat> @vfmax_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
 ; CHECK-NEXT:    vmfeq.vv v8, v16, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v16, v16, v24, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
@@ -387,19 +355,14 @@ define <vscale x 32 x bfloat> @vfmax_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat
 ; CHECK-NEXT:    mul a1, a1, a2
 ; CHECK-NEXT:    sub sp, sp, a1
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
-; CHECK-NEXT:    add a1, sp, a1
-; CHECK-NEXT:    addi a1, a1, 16
-; CHECK-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
-; CHECK-NEXT:    vmset.m v16
+; CHECK-NEXT:    vmset.m v24
 ; CHECK-NEXT:    slli a1, a2, 1
 ; CHECK-NEXT:    srli a2, a2, 2
 ; CHECK-NEXT:    sub a3, a0, a1
 ; CHECK-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslidedown.vx v7, v16, a2
+; CHECK-NEXT:    vslidedown.vx v7, v24, a2
 ; CHECK-NEXT:    sltu a2, a0, a3
 ; CHECK-NEXT:    addi a2, a2, -1
 ; CHECK-NEXT:    and a2, a2, a3
@@ -410,32 +373,29 @@ define <vscale x 32 x bfloat> @vfmax_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat
 ; CHECK-NEXT:    addi a3, a3, 16
 ; CHECK-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v6, v16, v16, v0.t
+; CHECK-NEXT:    vmfeq.vv v6, v24, v24, v0.t
 ; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    slli a2, a2, 4
 ; CHECK-NEXT:    add a2, sp, a2
 ; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v28, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    vmv8r.v v24, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
-; CHECK-NEXT:    addi a2, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v12, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v6, v16, v16, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vmerge.vvm v16, v16, v24, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vfmax.vv v16, v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16, v0.t
+; CHECK-NEXT:    addi a2, sp, 16
 ; CHECK-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    bltu a0, a1, .LBB11_2
 ; CHECK-NEXT:  # %bb.1:
@@ -642,13 +602,13 @@ define <vscale x 4 x half> @vfmax_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v8, v14, v14, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v14, v12, v0
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v14, v12, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v14, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v11, v12, v12, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v11
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v12, v14, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vfmax.vv v12, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v12, v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -712,13 +672,13 @@ define <vscale x 8 x half> @vfmax_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v8, v20, v20, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v24, v20, v16, v0
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v20, v16, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v20, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v13, v16, v16, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v13
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v20, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vfmax.vv v16, v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v16, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -775,12 +735,6 @@ define <vscale x 16 x half> @vfmax_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
 ;
 ; ZVFHMIN-LABEL: vfmax_vv_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    addi sp, sp, -16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 3
-; ZVFHMIN-NEXT:    sub sp, sp, a1
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv1r.v v7, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
@@ -789,23 +743,14 @@ define <vscale x 16 x half> @vfmax_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v8
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v24, v0
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v24, v24, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v16, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v6, v24, v24, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v6
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v24, v16, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfmax.vv v16, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v16, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add sp, sp, a0
-; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
-; ZVFHMIN-NEXT:    addi sp, sp, 16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.vp.maximum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x half> %v
@@ -844,32 +789,17 @@ define <vscale x 16 x half> @vfmax_vv_nxv16f16_unmasked(<vscale x 16 x half> %va
 define <vscale x 32 x half> @vfmax_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vfmax_vv_nxv32f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    addi sp, sp, -16
-; ZVFH-NEXT:    .cfi_def_cfa_offset 16
-; ZVFH-NEXT:    csrr a1, vlenb
-; ZVFH-NEXT:    slli a1, a1, 3
-; ZVFH-NEXT:    sub sp, sp, a1
-; ZVFH-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFH-NEXT:    vmv1r.v v7, v0
-; ZVFH-NEXT:    vmfeq.vv v25, v8, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v25
+; ZVFH-NEXT:    vmfeq.vv v24, v8, v8, v0.t
+; ZVFH-NEXT:    vmv1r.v v0, v24
 ; ZVFH-NEXT:    vmerge.vvm v24, v8, v16, v0
-; ZVFH-NEXT:    addi a0, sp, 16
-; ZVFH-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; ZVFH-NEXT:    vmv1r.v v0, v7
-; ZVFH-NEXT:    vmfeq.vv v25, v16, v16, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v25
+; ZVFH-NEXT:    vmfeq.vv v6, v16, v16, v0.t
+; ZVFH-NEXT:    vmv1r.v v0, v6
 ; ZVFH-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; ZVFH-NEXT:    vmv1r.v v0, v7
-; ZVFH-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFH-NEXT:    vfmax.vv v8, v8, v16, v0.t
-; ZVFH-NEXT:    csrr a0, vlenb
-; ZVFH-NEXT:    slli a0, a0, 3
-; ZVFH-NEXT:    add sp, sp, a0
-; ZVFH-NEXT:    .cfi_def_cfa sp, 16
-; ZVFH-NEXT:    addi sp, sp, 16
-; ZVFH-NEXT:    .cfi_def_cfa_offset 0
+; ZVFH-NEXT:    vfmax.vv v8, v8, v24, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfmax_vv_nxv32f16:
@@ -883,11 +813,6 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vmv1r.v v7, v0
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 4
-; ZVFHMIN-NEXT:    add a1, sp, a1
-; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a1, a2, 1
 ; ZVFHMIN-NEXT:    srli a2, a2, 2
@@ -900,37 +825,25 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    addi a3, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v5, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv8r.v v8, v16
+; ZVFHMIN-NEXT:    vmfeq.vv v5, v24, v24, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 4
+; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v28, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v5
-; ZVFHMIN-NEXT:    vmv8r.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v16, v0
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v24, v8, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v6
-; ZVFHMIN-NEXT:    vmfeq.vv v12, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v24, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v5, v8, v8, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v5
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v24, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v6
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfmax.vv v16, v16, v8, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v16, v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16, v0.t
 ; ZVFHMIN-NEXT:    bltu a0, a1, .LBB22_2
@@ -939,18 +852,18 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:  .LBB22_2:
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
 ; ZVFHMIN-NEXT:    addi a1, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16, v0.t
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v8, v24, v24, v0.t
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
@@ -958,14 +871,14 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v8
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmerge.vvm v24, v24, v16, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
@@ -973,14 +886,14 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v8
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v24, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
@@ -1020,19 +933,14 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16_unmasked(<vscale x 32 x half> %va
 ; ZVFHMIN-NEXT:    mul a1, a1, a2
 ; ZVFHMIN-NEXT:    sub sp, sp, a1
 ; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 4
-; ZVFHMIN-NEXT:    add a1, sp, a1
-; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
-; ZVFHMIN-NEXT:    vmset.m v16
+; ZVFHMIN-NEXT:    vmset.m v24
 ; ZVFHMIN-NEXT:    slli a1, a2, 1
 ; ZVFHMIN-NEXT:    srli a2, a2, 2
 ; ZVFHMIN-NEXT:    sub a3, a0, a1
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v7, v16, a2
+; ZVFHMIN-NEXT:    vslidedown.vx v7, v24, a2
 ; ZVFHMIN-NEXT:    sltu a2, a0, a3
 ; ZVFHMIN-NEXT:    addi a2, a2, -1
 ; ZVFHMIN-NEXT:    and a2, a2, a3
@@ -1043,32 +951,29 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16_unmasked(<vscale x 32 x half> %va
 ; ZVFHMIN-NEXT:    addi a3, a3, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v6, v16, v16, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v6, v24, v24, v0.t
 ; ZVFHMIN-NEXT:    vmv8r.v v8, v16
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v28, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v6
-; ZVFHMIN-NEXT:    vmv8r.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v16, v0
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v16, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vmfeq.vv v12, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v12
+; ZVFHMIN-NEXT:    vmfeq.vv v6, v16, v16, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v6
 ; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v24, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vfmax.vv v16, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16, v0.t
+; ZVFHMIN-NEXT:    addi a2, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    bltu a0, a1, .LBB23_2
 ; ZVFHMIN-NEXT:  # %bb.1:
@@ -1348,32 +1253,17 @@ define <vscale x 4 x double> @vfmax_vv_nxv4f64_unmasked(<vscale x 4 x double> %v
 define <vscale x 8 x double> @vfmax_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfmax_vv_nxv8f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v7, v0
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v24, v8, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v6, v16, v16, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfmax.vv v8, v8, v16, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    .cfi_def_cfa sp, 16
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    vfmax.vv v8, v8, v24, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.maximum.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x double> %v
@@ -1400,14 +1290,13 @@ define <vscale x 16 x double> @vfmax_vv_nxv16f64(<vscale x 16 x double> %va, <vs
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    li a3, 24
-; CHECK-NEXT:    mul a1, a1, a3
+; CHECK-NEXT:    slli a1, a1, 4
 ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vmv1r.v v7, v0
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
+; CHECK-NEXT:    slli a1, a1, 3
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
@@ -1417,37 +1306,27 @@ define <vscale x 16 x double> @vfmax_vv_nxv16f64(<vscale x 16 x double> %va, <vs
 ; CHECK-NEXT:    vslidedown.vx v6, v0, a4
 ; CHECK-NEXT:    sub a4, a2, a1
 ; CHECK-NEXT:    add a3, a0, a3
-; CHECK-NEXT:    vl8re64.v v8, (a3)
+; CHECK-NEXT:    vl8re64.v v24, (a3)
 ; CHECK-NEXT:    sltu a3, a2, a4
 ; CHECK-NEXT:    addi a3, a3, -1
 ; CHECK-NEXT:    and a3, a3, a4
 ; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v26, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v26
-; CHECK-NEXT:    vmerge.vvm v24, v16, v8, v0
-; CHECK-NEXT:    csrr a3, vlenb
-; CHECK-NEXT:    slli a3, a3, 3
-; CHECK-NEXT:    add a3, sp, a3
-; CHECK-NEXT:    addi a3, a3, 16
-; CHECK-NEXT:    vs8r.v v24, (a3) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    vmv8r.v v24, v16
+; CHECK-NEXT:    vmfeq.vv v8, v16, v16, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmv8r.v v8, v16
+; CHECK-NEXT:    vmerge.vvm v16, v16, v24, v0
+; CHECK-NEXT:    addi a3, sp, 16
+; CHECK-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    vmfeq.vv v5, v8, v8, v0.t
+; CHECK-NEXT:    vmfeq.vv v5, v24, v24, v0.t
 ; CHECK-NEXT:    vl8re64.v v16, (a0)
 ; CHECK-NEXT:    vmv1r.v v0, v5
-; CHECK-NEXT:    vmerge.vvm v8, v8, v24, v0
+; CHECK-NEXT:    vmerge.vvm v8, v24, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vfmax.vv v8, v8, v24, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    bltu a2, a1, .LBB40_2
 ; CHECK-NEXT:  # %bb.1:
@@ -1455,31 +1334,24 @@ define <vscale x 16 x double> @vfmax_vv_nxv16f64(<vscale x 16 x double> %va, <vs
 ; CHECK-NEXT:  .LBB40_2:
 ; CHECK-NEXT:    vmv1r.v v0, v7
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v24, v8, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vmerge.vvm v16, v16, v8, v0
+; CHECK-NEXT:    vmfeq.vv v6, v16, v16, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v6
+; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfmax.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vfmax.vv v8, v8, v24, v0.t
+; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    li a1, 24
-; CHECK-NEXT:    mul a0, a0, a1
+; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    .cfi_def_cfa sp, 16
 ; CHECK-NEXT:    addi sp, sp, 16

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll
index 86ed239e99373..27816a862cc1b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll
@@ -110,13 +110,13 @@ define <vscale x 4 x bfloat> @vfmin_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v8, v14, v14, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v16, v14, v12, v0
+; CHECK-NEXT:    vmerge.vvm v8, v14, v12, v0
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v14, v0
+; CHECK-NEXT:    vmfeq.vv v11, v12, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v11
+; CHECK-NEXT:    vmerge.vvm v12, v12, v14, v0
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfmin.vv v12, v8, v16, v0.t
+; CHECK-NEXT:    vfmin.vv v12, v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12, v0.t
 ; CHECK-NEXT:    ret
@@ -155,13 +155,13 @@ define <vscale x 8 x bfloat> @vfmin_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v8, v20, v20, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v24, v20, v16, v0
+; CHECK-NEXT:    vmerge.vvm v8, v20, v16, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v16, v20, v0
+; CHECK-NEXT:    vmfeq.vv v13, v16, v16, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmerge.vvm v16, v16, v20, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfmin.vv v16, v8, v24, v0.t
+; CHECK-NEXT:    vfmin.vv v16, v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
 ; CHECK-NEXT:    ret
@@ -193,12 +193,6 @@ define <vscale x 8 x bfloat> @vfmin_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %
 define <vscale x 16 x bfloat> @vfmin_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfmin_vv_nxv16bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vmv1r.v v7, v0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12, v0.t
@@ -207,23 +201,14 @@ define <vscale x 16 x bfloat> @vfmin_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <v
 ; CHECK-NEXT:    vmfeq.vv v8, v16, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v8, v24, v24, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
+; CHECK-NEXT:    vmfeq.vv v6, v24, v24, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v6
+; CHECK-NEXT:    vmerge.vvm v16, v24, v16, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfmin.vv v16, v8, v16, v0.t
+; CHECK-NEXT:    vfmin.vv v16, v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    .cfi_def_cfa sp, 16
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.vp.minimum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x bfloat> %v
@@ -261,11 +246,6 @@ define <vscale x 32 x bfloat> @vfmin_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vmv1r.v v7, v0
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
-; CHECK-NEXT:    add a1, sp, a1
-; CHECK-NEXT:    addi a1, a1, 16
-; CHECK-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    slli a1, a2, 1
 ; CHECK-NEXT:    srli a2, a2, 2
@@ -278,37 +258,25 @@ define <vscale x 32 x bfloat> @vfmin_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
 ; CHECK-NEXT:    addi a3, sp, 16
 ; CHECK-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v5, v16, v16, v0.t
-; CHECK-NEXT:    vmv8r.v v8, v16
+; CHECK-NEXT:    vmfeq.vv v5, v24, v24, v0.t
 ; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 4
+; CHECK-NEXT:    slli a2, a2, 3
 ; CHECK-NEXT:    add a2, sp, a2
 ; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v28, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v20, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v5
-; CHECK-NEXT:    vmv8r.v v24, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vmerge.vvm v16, v24, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    vmfeq.vv v12, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmerge.vvm v16, v16, v24, v0
+; CHECK-NEXT:    vmfeq.vv v5, v8, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v5
+; CHECK-NEXT:    vmerge.vvm v8, v8, v24, v0
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfmin.vv v16, v16, v8, v0.t
+; CHECK-NEXT:    vfmin.vv v16, v8, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16, v0.t
 ; CHECK-NEXT:    bltu a0, a1, .LBB10_2
@@ -317,18 +285,18 @@ define <vscale x 32 x bfloat> @vfmin_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
 ; CHECK-NEXT:  .LBB10_2:
 ; CHECK-NEXT:    vmv1r.v v0, v7
 ; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v24, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v16, v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16, v0.t
+; CHECK-NEXT:    vmfeq.vv v8, v24, v24, v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
@@ -336,14 +304,14 @@ define <vscale x 32 x bfloat> @vfmin_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v24, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vmerge.vvm v24, v24, v16, v0
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
@@ -351,14 +319,14 @@ define <vscale x 32 x bfloat> @vfmin_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
 ; CHECK-NEXT:    vmfeq.vv v8, v16, v16, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v16, v16, v24, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
@@ -387,19 +355,14 @@ define <vscale x 32 x bfloat> @vfmin_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat
 ; CHECK-NEXT:    mul a1, a1, a2
 ; CHECK-NEXT:    sub sp, sp, a1
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
-; CHECK-NEXT:    add a1, sp, a1
-; CHECK-NEXT:    addi a1, a1, 16
-; CHECK-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
-; CHECK-NEXT:    vmset.m v16
+; CHECK-NEXT:    vmset.m v24
 ; CHECK-NEXT:    slli a1, a2, 1
 ; CHECK-NEXT:    srli a2, a2, 2
 ; CHECK-NEXT:    sub a3, a0, a1
 ; CHECK-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslidedown.vx v7, v16, a2
+; CHECK-NEXT:    vslidedown.vx v7, v24, a2
 ; CHECK-NEXT:    sltu a2, a0, a3
 ; CHECK-NEXT:    addi a2, a2, -1
 ; CHECK-NEXT:    and a2, a2, a3
@@ -410,32 +373,29 @@ define <vscale x 32 x bfloat> @vfmin_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat
 ; CHECK-NEXT:    addi a3, a3, 16
 ; CHECK-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v6, v16, v16, v0.t
+; CHECK-NEXT:    vmfeq.vv v6, v24, v24, v0.t
 ; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    slli a2, a2, 4
 ; CHECK-NEXT:    add a2, sp, a2
 ; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v28, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    vmv8r.v v24, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
-; CHECK-NEXT:    addi a2, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v12, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vv v6, v16, v16, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vmerge.vvm v16, v16, v24, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vfmin.vv v16, v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16, v0.t
+; CHECK-NEXT:    addi a2, sp, 16
 ; CHECK-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    bltu a0, a1, .LBB11_2
 ; CHECK-NEXT:  # %bb.1:
@@ -642,13 +602,13 @@ define <vscale x 4 x half> @vfmin_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v8, v14, v14, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v14, v12, v0
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v14, v12, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v14, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v11, v12, v12, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v11
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v12, v14, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vfmin.vv v12, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v12, v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -712,13 +672,13 @@ define <vscale x 8 x half> @vfmin_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v8, v20, v20, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v24, v20, v16, v0
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v20, v16, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v20, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v13, v16, v16, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v13
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v20, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vfmin.vv v16, v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v16, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -775,12 +735,6 @@ define <vscale x 16 x half> @vfmin_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
 ;
 ; ZVFHMIN-LABEL: vfmin_vv_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    addi sp, sp, -16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 3
-; ZVFHMIN-NEXT:    sub sp, sp, a1
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv1r.v v7, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
@@ -789,23 +743,14 @@ define <vscale x 16 x half> @vfmin_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v8
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v24, v0
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v24, v24, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v16, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v6, v24, v24, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v6
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v24, v16, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfmin.vv v16, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v16, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add sp, sp, a0
-; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
-; ZVFHMIN-NEXT:    addi sp, sp, 16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.vp.minimum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x half> %v
@@ -844,32 +789,17 @@ define <vscale x 16 x half> @vfmin_vv_nxv16f16_unmasked(<vscale x 16 x half> %va
 define <vscale x 32 x half> @vfmin_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vfmin_vv_nxv32f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    addi sp, sp, -16
-; ZVFH-NEXT:    .cfi_def_cfa_offset 16
-; ZVFH-NEXT:    csrr a1, vlenb
-; ZVFH-NEXT:    slli a1, a1, 3
-; ZVFH-NEXT:    sub sp, sp, a1
-; ZVFH-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFH-NEXT:    vmv1r.v v7, v0
-; ZVFH-NEXT:    vmfeq.vv v25, v8, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v25
+; ZVFH-NEXT:    vmfeq.vv v24, v8, v8, v0.t
+; ZVFH-NEXT:    vmv1r.v v0, v24
 ; ZVFH-NEXT:    vmerge.vvm v24, v8, v16, v0
-; ZVFH-NEXT:    addi a0, sp, 16
-; ZVFH-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; ZVFH-NEXT:    vmv1r.v v0, v7
-; ZVFH-NEXT:    vmfeq.vv v25, v16, v16, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v25
+; ZVFH-NEXT:    vmfeq.vv v6, v16, v16, v0.t
+; ZVFH-NEXT:    vmv1r.v v0, v6
 ; ZVFH-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; ZVFH-NEXT:    vmv1r.v v0, v7
-; ZVFH-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFH-NEXT:    vfmin.vv v8, v8, v16, v0.t
-; ZVFH-NEXT:    csrr a0, vlenb
-; ZVFH-NEXT:    slli a0, a0, 3
-; ZVFH-NEXT:    add sp, sp, a0
-; ZVFH-NEXT:    .cfi_def_cfa sp, 16
-; ZVFH-NEXT:    addi sp, sp, 16
-; ZVFH-NEXT:    .cfi_def_cfa_offset 0
+; ZVFH-NEXT:    vfmin.vv v8, v8, v24, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfmin_vv_nxv32f16:
@@ -883,11 +813,6 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vmv1r.v v7, v0
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 4
-; ZVFHMIN-NEXT:    add a1, sp, a1
-; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a1, a2, 1
 ; ZVFHMIN-NEXT:    srli a2, a2, 2
@@ -900,37 +825,25 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    addi a3, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v5, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv8r.v v8, v16
+; ZVFHMIN-NEXT:    vmfeq.vv v5, v24, v24, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 4
+; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v28, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v5
-; ZVFHMIN-NEXT:    vmv8r.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v16, v0
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v24, v8, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v6
-; ZVFHMIN-NEXT:    vmfeq.vv v12, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v24, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v5, v8, v8, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v5
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v24, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v6
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfmin.vv v16, v16, v8, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v16, v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16, v0.t
 ; ZVFHMIN-NEXT:    bltu a0, a1, .LBB22_2
@@ -939,18 +852,18 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:  .LBB22_2:
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
 ; ZVFHMIN-NEXT:    addi a1, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16, v0.t
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v8, v24, v24, v0.t
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
@@ -958,14 +871,14 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v8
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmerge.vvm v24, v24, v16, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
@@ -973,14 +886,14 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v8
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v24, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
@@ -1020,19 +933,14 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16_unmasked(<vscale x 32 x half> %va
 ; ZVFHMIN-NEXT:    mul a1, a1, a2
 ; ZVFHMIN-NEXT:    sub sp, sp, a1
 ; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 4
-; ZVFHMIN-NEXT:    add a1, sp, a1
-; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
-; ZVFHMIN-NEXT:    vmset.m v16
+; ZVFHMIN-NEXT:    vmset.m v24
 ; ZVFHMIN-NEXT:    slli a1, a2, 1
 ; ZVFHMIN-NEXT:    srli a2, a2, 2
 ; ZVFHMIN-NEXT:    sub a3, a0, a1
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v7, v16, a2
+; ZVFHMIN-NEXT:    vslidedown.vx v7, v24, a2
 ; ZVFHMIN-NEXT:    sltu a2, a0, a3
 ; ZVFHMIN-NEXT:    addi a2, a2, -1
 ; ZVFHMIN-NEXT:    and a2, a2, a3
@@ -1043,32 +951,29 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16_unmasked(<vscale x 32 x half> %va
 ; ZVFHMIN-NEXT:    addi a3, a3, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v6, v16, v16, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v6, v24, v24, v0.t
 ; ZVFHMIN-NEXT:    vmv8r.v v8, v16
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v28, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v6
-; ZVFHMIN-NEXT:    vmv8r.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v16, v0
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v16, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vmfeq.vv v12, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v12
+; ZVFHMIN-NEXT:    vmfeq.vv v6, v16, v16, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v6
 ; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v24, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vfmin.vv v16, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16, v0.t
+; ZVFHMIN-NEXT:    addi a2, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    bltu a0, a1, .LBB23_2
 ; ZVFHMIN-NEXT:  # %bb.1:
@@ -1348,32 +1253,17 @@ define <vscale x 4 x double> @vfmin_vv_nxv4f64_unmasked(<vscale x 4 x double> %v
 define <vscale x 8 x double> @vfmin_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfmin_vv_nxv8f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v7, v0
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v24, v8, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v6, v16, v16, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfmin.vv v8, v8, v16, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    .cfi_def_cfa sp, 16
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    vfmin.vv v8, v8, v24, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.minimum.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x double> %v
@@ -1400,14 +1290,13 @@ define <vscale x 16 x double> @vfmin_vv_nxv16f64(<vscale x 16 x double> %va, <vs
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    li a3, 24
-; CHECK-NEXT:    mul a1, a1, a3
+; CHECK-NEXT:    slli a1, a1, 4
 ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vmv1r.v v7, v0
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
+; CHECK-NEXT:    slli a1, a1, 3
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
@@ -1417,37 +1306,27 @@ define <vscale x 16 x double> @vfmin_vv_nxv16f64(<vscale x 16 x double> %va, <vs
 ; CHECK-NEXT:    vslidedown.vx v6, v0, a4
 ; CHECK-NEXT:    sub a4, a2, a1
 ; CHECK-NEXT:    add a3, a0, a3
-; CHECK-NEXT:    vl8re64.v v8, (a3)
+; CHECK-NEXT:    vl8re64.v v24, (a3)
 ; CHECK-NEXT:    sltu a3, a2, a4
 ; CHECK-NEXT:    addi a3, a3, -1
 ; CHECK-NEXT:    and a3, a3, a4
 ; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v26, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v26
-; CHECK-NEXT:    vmerge.vvm v24, v16, v8, v0
-; CHECK-NEXT:    csrr a3, vlenb
-; CHECK-NEXT:    slli a3, a3, 3
-; CHECK-NEXT:    add a3, sp, a3
-; CHECK-NEXT:    addi a3, a3, 16
-; CHECK-NEXT:    vs8r.v v24, (a3) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    vmv8r.v v24, v16
+; CHECK-NEXT:    vmfeq.vv v8, v16, v16, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmv8r.v v8, v16
+; CHECK-NEXT:    vmerge.vvm v16, v16, v24, v0
+; CHECK-NEXT:    addi a3, sp, 16
+; CHECK-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    vmfeq.vv v5, v8, v8, v0.t
+; CHECK-NEXT:    vmfeq.vv v5, v24, v24, v0.t
 ; CHECK-NEXT:    vl8re64.v v16, (a0)
 ; CHECK-NEXT:    vmv1r.v v0, v5
-; CHECK-NEXT:    vmerge.vvm v8, v8, v24, v0
+; CHECK-NEXT:    vmerge.vvm v8, v24, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vfmin.vv v8, v8, v24, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    bltu a2, a1, .LBB40_2
 ; CHECK-NEXT:  # %bb.1:
@@ -1455,31 +1334,24 @@ define <vscale x 16 x double> @vfmin_vv_nxv16f64(<vscale x 16 x double> %va, <vs
 ; CHECK-NEXT:  .LBB40_2:
 ; CHECK-NEXT:    vmv1r.v v0, v7
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v24, v8, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vmerge.vvm v16, v16, v8, v0
+; CHECK-NEXT:    vmfeq.vv v6, v16, v16, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v6
+; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfmin.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vfmin.vv v8, v8, v24, v0.t
+; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    li a1, 24
-; CHECK-NEXT:    mul a0, a0, a1
+; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    .cfi_def_cfa sp, 16
 ; CHECK-NEXT:    addi sp, sp, 16

diff  --git a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll
index 67e7f7c7fbd42..3a4591d47e5e4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll
@@ -120,23 +120,23 @@ define <vscale x 4 x bfloat> @vp_nearbyint_nxv4bf16(<vscale x 4 x bfloat> %va, <
 ; CHECK-LABEL: vp_nearbyint_nxv4bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v9, v0
+; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
-; CHECK-NEXT:    vmv1r.v v8, v0
+; CHECK-NEXT:    vmv1r.v v13, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v10, v0.t
+; CHECK-NEXT:    vfabs.v v8, v10, v0.t
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v8, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v13, v8, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v10, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v8, v10, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v9
+; CHECK-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10, v0.t
 ; CHECK-NEXT:    fsflags a0
@@ -172,23 +172,23 @@ define <vscale x 8 x bfloat> @vp_nearbyint_nxv8bf16(<vscale x 8 x bfloat> %va, <
 ; CHECK-LABEL: vp_nearbyint_nxv8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
+; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
-; CHECK-NEXT:    vmv1r.v v8, v0
+; CHECK-NEXT:    vmv1r.v v17, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v12, v0.t
+; CHECK-NEXT:    vfabs.v v8, v12, v0.t
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v8, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v17, v8, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmv1r.v v0, v17
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v12, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v8, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12, v0.t
 ; CHECK-NEXT:    fsflags a0
@@ -224,23 +224,23 @@ define <vscale x 16 x bfloat> @vp_nearbyint_nxv16bf16(<vscale x 16 x bfloat> %va
 ; CHECK-LABEL: vp_nearbyint_nxv16bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmv1r.v v24, v0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
-; CHECK-NEXT:    vmv1r.v v8, v0
+; CHECK-NEXT:    vmv1r.v v25, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v16, v0.t
+; CHECK-NEXT:    vfabs.v v8, v16, v0.t
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v8, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v25, v8, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
 ; CHECK-NEXT:    fsflags a0
@@ -585,23 +585,23 @@ define <vscale x 4 x half> @vp_nearbyint_nxv4f16(<vscale x 4 x half> %va, <vscal
 ; ZVFHMIN-LABEL: vp_nearbyint_nxv4f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v9, v0
+; ZVFHMIN-NEXT:    vmv1r.v v12, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
-; ZVFHMIN-NEXT:    vmv1r.v v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v13, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v8, v12, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v13, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    frflags a0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmv1r.v v0, v13
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v10, v0.t
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v9
+; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
 ; ZVFHMIN-NEXT:    fsflags a0
@@ -653,43 +653,43 @@ define <vscale x 8 x half> @vp_nearbyint_nxv8f16(<vscale x 8 x half> %va, <vscal
 ; ZVFH-LABEL: vp_nearbyint_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vmv1r.v v12, v0
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    li a0, 25
 ; ZVFH-NEXT:    slli a0, a0, 10
 ; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; ZVFH-NEXT:    frflags a0
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmv1r.v v0, v12
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    fsflags a0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_nearbyint_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v10, v0
+; ZVFHMIN-NEXT:    vmv1r.v v16, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
-; ZVFHMIN-NEXT:    vmv1r.v v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v17, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v8, v16, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v17, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    frflags a0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmv1r.v v0, v17
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v12, v0.t
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; ZVFHMIN-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
 ; ZVFHMIN-NEXT:    fsflags a0
@@ -741,43 +741,43 @@ define <vscale x 16 x half> @vp_nearbyint_nxv16f16(<vscale x 16 x half> %va, <vs
 ; ZVFH-LABEL: vp_nearbyint_nxv16f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFH-NEXT:    vmv1r.v v12, v0
-; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-NEXT:    vmv1r.v v16, v0
+; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; ZVFH-NEXT:    li a0, 25
 ; ZVFH-NEXT:    slli a0, a0, 10
 ; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; ZVFH-NEXT:    frflags a0
-; ZVFH-NEXT:    vmv1r.v v0, v12
+; ZVFH-NEXT:    vmv1r.v v0, v16
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; ZVFH-NEXT:    fsflags a0
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_nearbyint_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v12, v0
+; ZVFHMIN-NEXT:    vmv1r.v v24, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
-; ZVFHMIN-NEXT:    vmv1r.v v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v25, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v8, v24, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v25, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    frflags a0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmv1r.v v0, v25
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v16, v0.t
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v12
+; ZVFHMIN-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
 ; ZVFHMIN-NEXT:    fsflags a0
@@ -829,20 +829,20 @@ define <vscale x 32 x half> @vp_nearbyint_nxv32f16(<vscale x 32 x half> %va, <vs
 ; ZVFH-LABEL: vp_nearbyint_nxv32f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; ZVFH-NEXT:    vmv1r.v v16, v0
-; ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-NEXT:    vmv1r.v v24, v0
+; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; ZVFH-NEXT:    li a0, 25
 ; ZVFH-NEXT:    slli a0, a0, 10
 ; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; ZVFH-NEXT:    frflags a0
-; ZVFH-NEXT:    vmv1r.v v0, v16
+; ZVFH-NEXT:    vmv1r.v v0, v24
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; ZVFH-NEXT:    fsflags a0
 ; ZVFH-NEXT:    ret
 ;
@@ -1064,19 +1064,19 @@ define <vscale x 4 x float> @vp_nearbyint_nxv4f32(<vscale x 4 x float> %va, <vsc
 ; CHECK-LABEL: vp_nearbyint_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x float> @llvm.vp.nearbyint.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
@@ -1106,19 +1106,19 @@ define <vscale x 8 x float> @vp_nearbyint_nxv8f32(<vscale x 8 x float> %va, <vsc
 ; CHECK-LABEL: vp_nearbyint_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x float> @llvm.vp.nearbyint.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
@@ -1148,19 +1148,19 @@ define <vscale x 16 x float> @vp_nearbyint_nxv16f32(<vscale x 16 x float> %va, <
 ; CHECK-LABEL: vp_nearbyint_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x float> @llvm.vp.nearbyint.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
@@ -1330,78 +1330,78 @@ define <vscale x 2 x double> @vp_nearbyint_nxv2f64(<vscale x 2 x double> %va, <v
 ; RV32ZVFH-LABEL: vp_nearbyint_nxv2f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v10, v0
+; RV32ZVFH-NEXT:    vmv1r.v v12, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI36_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI36_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV32ZVFH-NEXT:    frflags a0
-; RV32ZVFH-NEXT:    vmv1r.v v0, v10
+; RV32ZVFH-NEXT:    vmv1r.v v0, v12
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32ZVFH-NEXT:    fsflags a0
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_nearbyint_nxv2f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v10, v0
-; RV64ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v12, v0
+; RV64ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV64ZVFH-NEXT:    frflags a0
-; RV64ZVFH-NEXT:    vmv1r.v v0, v10
+; RV64ZVFH-NEXT:    vmv1r.v v0, v12
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64ZVFH-NEXT:    fsflags a0
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_nearbyint_nxv2f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v10, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v12, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI36_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI36_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v10, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    frflags a0
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsflags a0
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_nearbyint_nxv2f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v10, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v12, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v10, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    frflags a0
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsflags a0
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.nearbyint.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
@@ -1478,78 +1478,78 @@ define <vscale x 4 x double> @vp_nearbyint_nxv4f64(<vscale x 4 x double> %va, <v
 ; RV32ZVFH-LABEL: vp_nearbyint_nxv4f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v12, v0
+; RV32ZVFH-NEXT:    vmv1r.v v16, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI38_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI38_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV32ZVFH-NEXT:    frflags a0
-; RV32ZVFH-NEXT:    vmv1r.v v0, v12
+; RV32ZVFH-NEXT:    vmv1r.v v0, v16
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32ZVFH-NEXT:    fsflags a0
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_nearbyint_nxv4f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v12, v0
-; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v16, v0
+; RV64ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV64ZVFH-NEXT:    frflags a0
-; RV64ZVFH-NEXT:    vmv1r.v v0, v12
+; RV64ZVFH-NEXT:    vmv1r.v v0, v16
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64ZVFH-NEXT:    fsflags a0
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_nearbyint_nxv4f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v12, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v16, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI38_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI38_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    frflags a0
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v12
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsflags a0
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_nearbyint_nxv4f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v12, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    frflags a0
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v12
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsflags a0
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.vp.nearbyint.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
@@ -1626,78 +1626,78 @@ define <vscale x 7 x double> @vp_nearbyint_nxv7f64(<vscale x 7 x double> %va, <v
 ; RV32ZVFH-LABEL: vp_nearbyint_nxv7f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v16, v0
+; RV32ZVFH-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI40_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI40_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFH-NEXT:    frflags a0
-; RV32ZVFH-NEXT:    vmv1r.v v0, v16
+; RV32ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFH-NEXT:    fsflags a0
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_nearbyint_nxv7f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v16, v0
-; RV64ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v24, v0
+; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFH-NEXT:    frflags a0
-; RV64ZVFH-NEXT:    vmv1r.v v0, v16
+; RV64ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFH-NEXT:    fsflags a0
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_nearbyint_nxv7f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v16, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI40_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI40_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    frflags a0
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsflags a0
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_nearbyint_nxv7f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v24, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    frflags a0
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsflags a0
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <vscale x 7 x double> @llvm.vp.nearbyint.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
@@ -1774,78 +1774,78 @@ define <vscale x 8 x double> @vp_nearbyint_nxv8f64(<vscale x 8 x double> %va, <v
 ; RV32ZVFH-LABEL: vp_nearbyint_nxv8f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v16, v0
+; RV32ZVFH-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI42_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI42_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFH-NEXT:    frflags a0
-; RV32ZVFH-NEXT:    vmv1r.v v0, v16
+; RV32ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFH-NEXT:    fsflags a0
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_nearbyint_nxv8f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v16, v0
-; RV64ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v24, v0
+; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFH-NEXT:    frflags a0
-; RV64ZVFH-NEXT:    vmv1r.v v0, v16
+; RV64ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFH-NEXT:    fsflags a0
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_nearbyint_nxv8f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v16, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI42_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI42_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    frflags a0
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsflags a0
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_nearbyint_nxv8f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v24, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    frflags a0
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsflags a0
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.nearbyint.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)

diff  --git a/llvm/test/CodeGen/RISCV/rvv/pr165232.ll b/llvm/test/CodeGen/RISCV/rvv/pr165232.ll
index bef53c6a5ae62..95f54bc29511b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/pr165232.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/pr165232.ll
@@ -17,164 +17,153 @@ define i1 @main(ptr %var_117, ptr %arrayinit.element3045, ptr %arrayinit.element
 ; CHECK-NEXT:    sub sp, sp, t0
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; CHECK-NEXT:    sd a1, 8(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd a2, 0(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a1, a1, 4
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vs4r.v v12, (a1) # vscale x 32-byte Folded Spill
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 2
-; CHECK-NEXT:    add a1, a1, a2
+; CHECK-NEXT:    csrr t0, vlenb
+; CHECK-NEXT:    slli t0, t0, 2
+; CHECK-NEXT:    add a1, a1, t0
 ; CHECK-NEXT:    vs4r.v v16, (a1) # vscale x 32-byte Folded Spill
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a1, a1, 2
-; CHECK-NEXT:    mv a2, a1
+; CHECK-NEXT:    mv t0, a1
 ; CHECK-NEXT:    slli a1, a1, 1
-; CHECK-NEXT:    add a1, a1, a2
+; CHECK-NEXT:    add a1, a1, t0
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    mv a2, a1
+; CHECK-NEXT:    mv t0, a1
 ; CHECK-NEXT:    slli a1, a1, 1
-; CHECK-NEXT:    add a1, a1, a2
+; CHECK-NEXT:    add a1, a1, t0
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    ld t0, 56(a1)
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    mv a2, a1
+; CHECK-NEXT:    mv t1, a1
 ; CHECK-NEXT:    slli a1, a1, 1
-; CHECK-NEXT:    add a1, a1, a2
+; CHECK-NEXT:    add a1, a1, t1
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    ld t1, 48(a1)
 ; CHECK-NEXT:    vsetvli t2, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v9, 0
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    mv a2, a1
+; CHECK-NEXT:    mv t2, a1
 ; CHECK-NEXT:    slli a1, a1, 1
-; CHECK-NEXT:    add a1, a1, a2
+; CHECK-NEXT:    add a1, a1, t2
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    ld t2, 40(a1)
 ; CHECK-NEXT:    # kill: def $v10 killed $v9 killed $vtype
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    mv a2, a1
+; CHECK-NEXT:    mv t3, a1
 ; CHECK-NEXT:    slli a1, a1, 1
-; CHECK-NEXT:    add a1, a1, a2
+; CHECK-NEXT:    add a1, a1, t3
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    ld t3, 32(a1)
 ; CHECK-NEXT:    vmv.v.i v11, 0
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    mv a2, a1
+; CHECK-NEXT:    mv t4, a1
 ; CHECK-NEXT:    slli a1, a1, 1
-; CHECK-NEXT:    add a1, a1, a2
+; CHECK-NEXT:    add a1, a1, t4
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    ld t4, 16(a1)
 ; CHECK-NEXT:    vmv.v.i v12, 0
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    mv a2, a1
+; CHECK-NEXT:    mv t5, a1
 ; CHECK-NEXT:    slli a1, a1, 1
-; CHECK-NEXT:    add a1, a1, a2
+; CHECK-NEXT:    add a1, a1, t5
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    ld t5, 24(a1)
 ; CHECK-NEXT:    vmv.v.i v13, 0
 ; CHECK-NEXT:    vsetvli t6, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vmv.v.i v22, 0
+; CHECK-NEXT:    vmv.v.i v16, 0
 ; CHECK-NEXT:    vmv1r.v v14, v9
 ; CHECK-NEXT:    sd zero, 0(a0)
-; CHECK-NEXT:    vmv.v.i v24, 0
+; CHECK-NEXT:    vmv.v.i v18, 0
 ; CHECK-NEXT:    vmv1r.v v15, v9
-; CHECK-NEXT:    vmv1r.v v18, v9
+; CHECK-NEXT:    vmv1r.v v28, v9
 ; CHECK-NEXT:    li t6, 1023
-; CHECK-NEXT:    vmv.v.i v26, 0
-; CHECK-NEXT:    vmv1r.v v19, v9
+; CHECK-NEXT:    vmv.v.i v20, 0
+; CHECK-NEXT:    vmv1r.v v29, v9
 ; CHECK-NEXT:    slli t6, t6, 52
-; CHECK-NEXT:    vmv.v.i v28, 0
+; CHECK-NEXT:    vmv.v.i v22, 0
 ; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vs2r.v v22, (a1) # vscale x 16-byte Folded Spill
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 1
-; CHECK-NEXT:    add a1, a1, a2
-; CHECK-NEXT:    vs4r.v v24, (a1) # vscale x 32-byte Folded Spill
-; CHECK-NEXT:    slli a2, a2, 1
-; CHECK-NEXT:    add a1, a1, a2
-; CHECK-NEXT:    ld a2, 0(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    vs2r.v v28, (a1) # vscale x 16-byte Folded Spill
+; CHECK-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    vmv1r.v v20, v9
+; CHECK-NEXT:    vmv1r.v v30, v9
 ; CHECK-NEXT:    sd t6, 0(t5)
-; CHECK-NEXT:    vmv2r.v v16, v14
-; CHECK-NEXT:    vmv2r.v v14, v12
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv1r.v v11, v9
 ; CHECK-NEXT:    vmv1r.v v21, v9
+; CHECK-NEXT:    vmv2r.v v22, v10
+; CHECK-NEXT:    vmv4r.v v24, v12
+; CHECK-NEXT:    vmv1r.v v31, v9
 ; CHECK-NEXT:    csrr t5, vlenb
 ; CHECK-NEXT:    slli t5, t5, 3
 ; CHECK-NEXT:    add t5, sp, t5
 ; CHECK-NEXT:    addi t5, t5, 16
-; CHECK-NEXT:    vs2r.v v18, (t5) # vscale x 16-byte Folded Spill
-; CHECK-NEXT:    csrr t6, vlenb
-; CHECK-NEXT:    slli t6, t6, 1
-; CHECK-NEXT:    add t5, t5, t6
-; CHECK-NEXT:    vs2r.v v20, (t5) # vscale x 16-byte Folded Spill
+; CHECK-NEXT:    vs4r.v v28, (t5) # vscale x 32-byte Folded Spill
 ; CHECK-NEXT:    vsetivli zero, 0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv.v.i v19, 0
-; CHECK-NEXT:    vmclr.m v10
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmv.v.i v6, 0
+; CHECK-NEXT:    vmv.v.i v13, 0
+; CHECK-NEXT:    vmclr.m v14
 ; CHECK-NEXT:  .LBB0_1: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmv1r.v v20, v19
-; CHECK-NEXT:    vmv1r.v v3, v19
-; CHECK-NEXT:    vmv1r.v v5, v19
-; CHECK-NEXT:    vmv1r.v v2, v19
-; CHECK-NEXT:    vmv1r.v v31, v19
-; CHECK-NEXT:    vmv1r.v v30, v19
-; CHECK-NEXT:    vmv1r.v v4, v19
-; CHECK-NEXT:    vmv2r.v v22, v10
-; CHECK-NEXT:    vmv4r.v v24, v12
-; CHECK-NEXT:    vmv2r.v v28, v16
-; CHECK-NEXT:    vmv2r.v v8, v6
-; CHECK-NEXT:    vmv1r.v v18, v19
-; CHECK-NEXT:    vmv1r.v v21, v10
+; CHECK-NEXT:    vsetivli zero, 0, e64, m2, ta, ma
+; CHECK-NEXT:    vmv1r.v v0, v14
+; CHECK-NEXT:    vmv1r.v v17, v13
+; CHECK-NEXT:    vmv1r.v v15, v13
+; CHECK-NEXT:    vmv1r.v v9, v13
+; CHECK-NEXT:    vmv1r.v v16, v13
+; CHECK-NEXT:    vmv1r.v v19, v13
+; CHECK-NEXT:    vmv1r.v v28, v13
+; CHECK-NEXT:    vmv1r.v v12, v13
+; CHECK-NEXT:    vmv1r.v v2, v21
+; CHECK-NEXT:    vmv1r.v v4, v23
+; CHECK-NEXT:    vmv1r.v v5, v24
+; CHECK-NEXT:    vmv1r.v v6, v25
+; CHECK-NEXT:    vmv1r.v v7, v26
+; CHECK-NEXT:    vmv1r.v v8, v27
+; CHECK-NEXT:    vmv.v.i v10, 0
+; CHECK-NEXT:    vmv.v.i v30, 0
+; CHECK-NEXT:    vmv1r.v v29, v13
+; CHECK-NEXT:    vmv1r.v v18, v14
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, tu, ma
-; CHECK-NEXT:    vle32.v v20, (t4)
-; CHECK-NEXT:    vle32.v v3, (t1)
-; CHECK-NEXT:    vle32.v v30, (a7)
-; CHECK-NEXT:    vle64.v v8, (a4)
-; CHECK-NEXT:    vle32.v v5, (t2)
-; CHECK-NEXT:    vle32.v v2, (t3)
-; CHECK-NEXT:    vle32.v v31, (a6)
-; CHECK-NEXT:    vmv1r.v v24, v30
+; CHECK-NEXT:    vle32.v v17, (t4)
+; CHECK-NEXT:    vle32.v v15, (t1)
+; CHECK-NEXT:    vle32.v v28, (a7)
+; CHECK-NEXT:    vle64.v v30, (a4)
+; CHECK-NEXT:    vle32.v v9, (t2)
+; CHECK-NEXT:    vle32.v v16, (t3)
+; CHECK-NEXT:    vle32.v v19, (a6)
+; CHECK-NEXT:    vmv1r.v v3, v28
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vv v21, v8, v6, v0.t
-; CHECK-NEXT:    vmv1r.v v8, v19
+; CHECK-NEXT:    vmflt.vv v18, v30, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v19, v13
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, tu, mu
-; CHECK-NEXT:    vle32.v v18, (a2)
-; CHECK-NEXT:    vle32.v v8, (a3)
-; CHECK-NEXT:    vle32.v v4, (a5)
-; CHECK-NEXT:    vmv1r.v v22, v20
+; CHECK-NEXT:    vle32.v v29, (a2)
+; CHECK-NEXT:    vle32.v v19, (a3)
+; CHECK-NEXT:    vle32.v v12, (a5)
+; CHECK-NEXT:    vmv1r.v v1, v17
 ; CHECK-NEXT:    csrr t5, vlenb
 ; CHECK-NEXT:    slli t5, t5, 3
 ; CHECK-NEXT:    add t5, sp, t5
 ; CHECK-NEXT:    addi t5, t5, 16
-; CHECK-NEXT:    vl1r.v v1, (t5) # vscale x 8-byte Folded Reload
+; CHECK-NEXT:    vl1r.v v9, (t5) # vscale x 8-byte Folded Reload
 ; CHECK-NEXT:    csrr t6, vlenb
 ; CHECK-NEXT:    add t5, t5, t6
-; CHECK-NEXT:    vl2r.v v2, (t5) # vscale x 16-byte Folded Reload
+; CHECK-NEXT:    vl2r.v v10, (t5) # vscale x 16-byte Folded Reload
 ; CHECK-NEXT:    slli t6, t6, 1
 ; CHECK-NEXT:    add t5, t5, t6
-; CHECK-NEXT:    vl1r.v v4, (t5) # vscale x 8-byte Folded Reload
-; CHECK-NEXT:    vsseg4e32.v v1, (zero)
-; CHECK-NEXT:    vsseg8e32.v v22, (a1)
-; CHECK-NEXT:    vmv1r.v v0, v21
-; CHECK-NEXT:    vssub.vv v8, v19, v18, v0.t
+; CHECK-NEXT:    vl1r.v v12, (t5) # vscale x 8-byte Folded Reload
+; CHECK-NEXT:    vsseg4e32.v v9, (zero)
+; CHECK-NEXT:    vsseg8e32.v v1, (a1)
+; CHECK-NEXT:    vmv1r.v v0, v18
+; CHECK-NEXT:    vssub.vv v19, v13, v29, v0.t
 ; CHECK-NEXT:    csrr t5, vlenb
 ; CHECK-NEXT:    slli t5, t5, 2
 ; CHECK-NEXT:    mv t6, t5
@@ -182,31 +171,30 @@ define i1 @main(ptr %var_117, ptr %arrayinit.element3045, ptr %arrayinit.element
 ; CHECK-NEXT:    add t5, t5, t6
 ; CHECK-NEXT:    add t5, sp, t5
 ; CHECK-NEXT:    addi t5, t5, 16
-; CHECK-NEXT:    vl4r.v v20, (t5) # vscale x 32-byte Folded Reload
+; CHECK-NEXT:    vl4r.v v28, (t5) # vscale x 32-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, t0, e64, m2, ta, ma
-; CHECK-NEXT:    vsseg2e64.v v20, (zero)
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vsseg2e64.v v28, (zero)
+; CHECK-NEXT:    vmv1r.v v0, v14
 ; CHECK-NEXT:    addi t5, sp, 16
-; CHECK-NEXT:    vl4r.v v20, (t5) # vscale x 32-byte Folded Reload
+; CHECK-NEXT:    vl2r.v v2, (t5) # vscale x 16-byte Folded Reload
 ; CHECK-NEXT:    csrr t6, vlenb
-; CHECK-NEXT:    slli t6, t6, 2
+; CHECK-NEXT:    slli t6, t6, 1
+; CHECK-NEXT:    add t5, t5, t6
+; CHECK-NEXT:    vl4r.v v4, (t5) # vscale x 32-byte Folded Reload
+; CHECK-NEXT:    slli t6, t6, 1
 ; CHECK-NEXT:    add t5, t5, t6
-; CHECK-NEXT:    vl4r.v v24, (t5) # vscale x 32-byte Folded Reload
+; CHECK-NEXT:    vl2r.v v8, (t5) # vscale x 16-byte Folded Reload
 ; CHECK-NEXT:    vsetivli zero, 0, e64, m2, ta, ma
-; CHECK-NEXT:    vsseg4e64.v v20, (zero), v0.t
+; CHECK-NEXT:    vsseg4e64.v v2, (zero), v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vsseg8e32.v v8, (a0)
+; CHECK-NEXT:    vsseg8e32.v v19, (a0)
 ; CHECK-NEXT:    csrr t5, vlenb
 ; CHECK-NEXT:    slli t5, t5, 4
 ; CHECK-NEXT:    add t5, sp, t5
 ; CHECK-NEXT:    addi t5, t5, 16
-; CHECK-NEXT:    vl4r.v v20, (t5) # vscale x 32-byte Folded Reload
-; CHECK-NEXT:    csrr t6, vlenb
-; CHECK-NEXT:    slli t6, t6, 2
-; CHECK-NEXT:    add t5, t5, t6
-; CHECK-NEXT:    vl4r.v v24, (t5) # vscale x 32-byte Folded Reload
+; CHECK-NEXT:    vl8r.v v0, (t5) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vsseg4e64.v v20, (zero)
+; CHECK-NEXT:    vsseg4e64.v v0, (zero)
 ; CHECK-NEXT:    j .LBB0_1
 entry:
   store double 0.000000e+00, ptr %var_117, align 8

diff  --git a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll
index 380287dd555c9..82235f08b5226 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll
@@ -112,22 +112,22 @@ define <vscale x 4 x bfloat> @vp_rint_nxv4bf16(<vscale x 4 x bfloat> %va, <vscal
 ; CHECK-LABEL: vp_rint_nxv4bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v9, v0
+; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
-; CHECK-NEXT:    vmv1r.v v8, v0
+; CHECK-NEXT:    vmv1r.v v13, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v10, v0.t
+; CHECK-NEXT:    vfabs.v v8, v10, v0.t
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v8, v12, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmflt.vf v13, v8, fa5, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v10, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v8, v10, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v9
+; CHECK-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10, v0.t
 ; CHECK-NEXT:    ret
@@ -160,22 +160,22 @@ define <vscale x 8 x bfloat> @vp_rint_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 ; CHECK-LABEL: vp_rint_nxv8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
+; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
-; CHECK-NEXT:    vmv1r.v v8, v0
+; CHECK-NEXT:    vmv1r.v v17, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v12, v0.t
+; CHECK-NEXT:    vfabs.v v8, v12, v0.t
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v8, v16, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmflt.vf v17, v8, fa5, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v17
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v12, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v8, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12, v0.t
 ; CHECK-NEXT:    ret
@@ -208,22 +208,22 @@ define <vscale x 16 x bfloat> @vp_rint_nxv16bf16(<vscale x 16 x bfloat> %va, <vs
 ; CHECK-LABEL: vp_rint_nxv16bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmv1r.v v24, v0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
-; CHECK-NEXT:    vmv1r.v v8, v0
+; CHECK-NEXT:    vmv1r.v v25, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v16, v0.t
+; CHECK-NEXT:    vfabs.v v8, v16, v0.t
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v8, v24, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmflt.vf v25, v8, fa5, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
 ; CHECK-NEXT:    ret
@@ -539,22 +539,22 @@ define <vscale x 4 x half> @vp_rint_nxv4f16(<vscale x 4 x half> %va, <vscale x 4
 ; ZVFHMIN-LABEL: vp_rint_nxv4f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v9, v0
+; ZVFHMIN-NEXT:    vmv1r.v v12, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
-; ZVFHMIN-NEXT:    vmv1r.v v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v13, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v8, v12, fa5, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmflt.vf v13, v8, fa5, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v13
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v10, v0.t
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v9
+; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -601,40 +601,40 @@ define <vscale x 8 x half> @vp_rint_nxv8f16(<vscale x 8 x half> %va, <vscale x 8
 ; ZVFH-LABEL: vp_rint_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vmv1r.v v12, v0
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    li a0, 25
 ; ZVFH-NEXT:    slli a0, a0, 10
 ; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
+; ZVFH-NEXT:    vmv1r.v v0, v12
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_rint_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v10, v0
+; ZVFHMIN-NEXT:    vmv1r.v v16, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
-; ZVFHMIN-NEXT:    vmv1r.v v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v17, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v8, v16, fa5, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmflt.vf v17, v8, fa5, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v17
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v12, v0.t
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; ZVFHMIN-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -681,40 +681,40 @@ define <vscale x 16 x half> @vp_rint_nxv16f16(<vscale x 16 x half> %va, <vscale
 ; ZVFH-LABEL: vp_rint_nxv16f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFH-NEXT:    vmv1r.v v12, v0
-; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-NEXT:    vmv1r.v v16, v0
+; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; ZVFH-NEXT:    li a0, 25
 ; ZVFH-NEXT:    slli a0, a0, 10
 ; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v12
+; ZVFH-NEXT:    vmflt.vf v16, v12, fa5, v0.t
+; ZVFH-NEXT:    vmv1r.v v0, v16
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_rint_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v12, v0
+; ZVFHMIN-NEXT:    vmv1r.v v24, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
-; ZVFHMIN-NEXT:    vmv1r.v v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v25, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v8, v24, fa5, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmflt.vf v25, v8, fa5, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v25
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v16, v0.t
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v12
+; ZVFHMIN-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -761,19 +761,19 @@ define <vscale x 32 x half> @vp_rint_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFH-LABEL: vp_rint_nxv32f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; ZVFH-NEXT:    vmv1r.v v16, v0
-; ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-NEXT:    vmv1r.v v24, v0
+; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; ZVFH-NEXT:    li a0, 25
 ; ZVFH-NEXT:    slli a0, a0, 10
 ; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v16
+; ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
+; ZVFH-NEXT:    vmv1r.v v0, v24
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_rint_nxv32f16:
@@ -976,18 +976,18 @@ define <vscale x 4 x float> @vp_rint_nxv4f32(<vscale x 4 x float> %va, <vscale x
 ; CHECK-LABEL: vp_rint_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmflt.vf v12, v10, fa5, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x float> @llvm.vp.rint.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x float> %v
@@ -1014,18 +1014,18 @@ define <vscale x 8 x float> @vp_rint_nxv8f32(<vscale x 8 x float> %va, <vscale x
 ; CHECK-LABEL: vp_rint_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmflt.vf v16, v12, fa5, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x float> @llvm.vp.rint.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x float> %v
@@ -1052,18 +1052,18 @@ define <vscale x 16 x float> @vp_rint_nxv16f32(<vscale x 16 x float> %va, <vscal
 ; CHECK-LABEL: vp_rint_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmflt.vf v24, v16, fa5, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x float> @llvm.vp.rint.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x float> %v
@@ -1214,71 +1214,71 @@ define <vscale x 2 x double> @vp_rint_nxv2f64(<vscale x 2 x double> %va, <vscale
 ; RV32ZVFH-LABEL: vp_rint_nxv2f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v10, v0
+; RV32ZVFH-NEXT:    vmv1r.v v12, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI36_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI36_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
-; RV32ZVFH-NEXT:    vmv1r.v v0, v10
+; RV32ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
+; RV32ZVFH-NEXT:    vmv1r.v v0, v12
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_rint_nxv2f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v10, v0
-; RV64ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v12, v0
+; RV64ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
-; RV64ZVFH-NEXT:    vmv1r.v v0, v10
+; RV64ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v0, v12
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFMIN-LABEL: vp_rint_nxv2f64:
 ; RV32ZVFMIN:       # %bb.0:
 ; RV32ZVFMIN-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32ZVFMIN-NEXT:    vmv1r.v v10, v0
+; RV32ZVFMIN-NEXT:    vmv1r.v v12, v0
 ; RV32ZVFMIN-NEXT:    lui a0, %hi(.LCPI36_0)
 ; RV32ZVFMIN-NEXT:    fld fa5, %lo(.LCPI36_0)(a0)
-; RV32ZVFMIN-NEXT:    vfabs.v v12, v8, v0.t
+; RV32ZVFMIN-NEXT:    vfabs.v v10, v8, v0.t
 ; RV32ZVFMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFMIN-NEXT:    vmflt.vf v10, v12, fa5, v0.t
-; RV32ZVFMIN-NEXT:    vmv1r.v v0, v10
+; RV32ZVFMIN-NEXT:    vmflt.vf v12, v10, fa5, v0.t
+; RV32ZVFMIN-NEXT:    vmv1r.v v0, v12
 ; RV32ZVFMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32ZVFMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; RV32ZVFMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32ZVFMIN-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV32ZVFMIN-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV32ZVFMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32ZVFMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32ZVFMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_rint_nxv2f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v10, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v12, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v10, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v10, v12, fa5, v0.t
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; RV64ZVFHMIN-NEXT:    vmflt.vf v12, v10, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.rint.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
@@ -1346,71 +1346,71 @@ define <vscale x 4 x double> @vp_rint_nxv4f64(<vscale x 4 x double> %va, <vscale
 ; RV32ZVFH-LABEL: vp_rint_nxv4f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v12, v0
+; RV32ZVFH-NEXT:    vmv1r.v v16, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI38_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI38_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
-; RV32ZVFH-NEXT:    vmv1r.v v0, v12
+; RV32ZVFH-NEXT:    vmflt.vf v16, v12, fa5, v0.t
+; RV32ZVFH-NEXT:    vmv1r.v v0, v16
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_rint_nxv4f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v12, v0
-; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v16, v0
+; RV64ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
-; RV64ZVFH-NEXT:    vmv1r.v v0, v12
+; RV64ZVFH-NEXT:    vmflt.vf v16, v12, fa5, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v0, v16
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFMIN-LABEL: vp_rint_nxv4f64:
 ; RV32ZVFMIN:       # %bb.0:
 ; RV32ZVFMIN-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32ZVFMIN-NEXT:    vmv1r.v v12, v0
+; RV32ZVFMIN-NEXT:    vmv1r.v v16, v0
 ; RV32ZVFMIN-NEXT:    lui a0, %hi(.LCPI38_0)
 ; RV32ZVFMIN-NEXT:    fld fa5, %lo(.LCPI38_0)(a0)
-; RV32ZVFMIN-NEXT:    vfabs.v v16, v8, v0.t
+; RV32ZVFMIN-NEXT:    vfabs.v v12, v8, v0.t
 ; RV32ZVFMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFMIN-NEXT:    vmflt.vf v12, v16, fa5, v0.t
-; RV32ZVFMIN-NEXT:    vmv1r.v v0, v12
+; RV32ZVFMIN-NEXT:    vmflt.vf v16, v12, fa5, v0.t
+; RV32ZVFMIN-NEXT:    vmv1r.v v0, v16
 ; RV32ZVFMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV32ZVFMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; RV32ZVFMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32ZVFMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32ZVFMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV32ZVFMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32ZVFMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32ZVFMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_rint_nxv4f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v12, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v12, v16, fa5, v0.t
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v12
+; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v12, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.vp.rint.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x double> %v
@@ -1478,71 +1478,71 @@ define <vscale x 7 x double> @vp_rint_nxv7f64(<vscale x 7 x double> %va, <vscale
 ; RV32ZVFH-LABEL: vp_rint_nxv7f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v16, v0
+; RV32ZVFH-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI40_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI40_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; RV32ZVFH-NEXT:    vmv1r.v v0, v16
+; RV32ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
+; RV32ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_rint_nxv7f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v16, v0
-; RV64ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v24, v0
+; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; RV64ZVFH-NEXT:    vmv1r.v v0, v16
+; RV64ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFMIN-LABEL: vp_rint_nxv7f64:
 ; RV32ZVFMIN:       # %bb.0:
 ; RV32ZVFMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFMIN-NEXT:    vmv1r.v v16, v0
+; RV32ZVFMIN-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFMIN-NEXT:    lui a0, %hi(.LCPI40_0)
 ; RV32ZVFMIN-NEXT:    fld fa5, %lo(.LCPI40_0)(a0)
-; RV32ZVFMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; RV32ZVFMIN-NEXT:    vmv1r.v v0, v16
+; RV32ZVFMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
+; RV32ZVFMIN-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; RV32ZVFMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32ZVFMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_rint_nxv7f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v24, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV64ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <vscale x 7 x double> @llvm.vp.rint.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x double> %v
@@ -1610,71 +1610,71 @@ define <vscale x 8 x double> @vp_rint_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; RV32ZVFH-LABEL: vp_rint_nxv8f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v16, v0
+; RV32ZVFH-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI42_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI42_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; RV32ZVFH-NEXT:    vmv1r.v v0, v16
+; RV32ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
+; RV32ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_rint_nxv8f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v16, v0
-; RV64ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v24, v0
+; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; RV64ZVFH-NEXT:    vmv1r.v v0, v16
+; RV64ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFMIN-LABEL: vp_rint_nxv8f64:
 ; RV32ZVFMIN:       # %bb.0:
 ; RV32ZVFMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFMIN-NEXT:    vmv1r.v v16, v0
+; RV32ZVFMIN-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFMIN-NEXT:    lui a0, %hi(.LCPI42_0)
 ; RV32ZVFMIN-NEXT:    fld fa5, %lo(.LCPI42_0)(a0)
-; RV32ZVFMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; RV32ZVFMIN-NEXT:    vmv1r.v v0, v16
+; RV32ZVFMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
+; RV32ZVFMIN-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; RV32ZVFMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32ZVFMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_rint_nxv8f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v24, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV64ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.rint.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x double> %v

diff  --git a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll
index 37c036d38148a..eae28fafb4300 100644
--- a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll
@@ -120,24 +120,24 @@ define <vscale x 4 x bfloat> @vp_round_nxv4bf16(<vscale x 4 x bfloat> %va, <vsca
 ; CHECK-LABEL: vp_round_nxv4bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v9, v0
+; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
-; CHECK-NEXT:    vmv1r.v v8, v0
+; CHECK-NEXT:    vmv1r.v v13, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v10, v0.t
+; CHECK-NEXT:    vfabs.v v8, v10, v0.t
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v8, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v13, v8, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v10, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v8, v10, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v9
+; CHECK-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10, v0.t
 ; CHECK-NEXT:    ret
@@ -172,24 +172,24 @@ define <vscale x 8 x bfloat> @vp_round_nxv8bf16(<vscale x 8 x bfloat> %va, <vsca
 ; CHECK-LABEL: vp_round_nxv8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
+; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
-; CHECK-NEXT:    vmv1r.v v8, v0
+; CHECK-NEXT:    vmv1r.v v17, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v12, v0.t
+; CHECK-NEXT:    vfabs.v v8, v12, v0.t
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v8, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v17, v8, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmv1r.v v0, v17
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v8, v12, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12, v0.t
 ; CHECK-NEXT:    ret
@@ -224,24 +224,24 @@ define <vscale x 16 x bfloat> @vp_round_nxv16bf16(<vscale x 16 x bfloat> %va, <v
 ; CHECK-LABEL: vp_round_nxv16bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmv1r.v v24, v0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
-; CHECK-NEXT:    vmv1r.v v8, v0
+; CHECK-NEXT:    vmv1r.v v25, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v16, v0.t
+; CHECK-NEXT:    vfabs.v v8, v16, v0.t
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v8, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v25, v8, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
 ; CHECK-NEXT:    ret
@@ -585,24 +585,24 @@ define <vscale x 4 x half> @vp_round_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN-LABEL: vp_round_nxv4f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v9, v0
+; ZVFHMIN-NEXT:    vmv1r.v v12, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
-; ZVFHMIN-NEXT:    vmv1r.v v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v13, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v8, v12, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v13, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    fsrmi a0, 4
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmv1r.v v0, v13
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v9
+; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -653,44 +653,44 @@ define <vscale x 8 x half> @vp_round_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: vp_round_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vmv1r.v v12, v0
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    li a0, 25
 ; ZVFH-NEXT:    slli a0, a0, 10
 ; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 4
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmv1r.v v0, v12
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_round_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v10, v0
+; ZVFHMIN-NEXT:    vmv1r.v v16, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
-; ZVFHMIN-NEXT:    vmv1r.v v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v17, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v8, v16, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v17, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    fsrmi a0, 4
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmv1r.v v0, v17
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; ZVFHMIN-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -741,44 +741,44 @@ define <vscale x 16 x half> @vp_round_nxv16f16(<vscale x 16 x half> %va, <vscale
 ; ZVFH-LABEL: vp_round_nxv16f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFH-NEXT:    vmv1r.v v12, v0
-; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-NEXT:    vmv1r.v v16, v0
+; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; ZVFH-NEXT:    li a0, 25
 ; ZVFH-NEXT:    slli a0, a0, 10
 ; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 4
-; ZVFH-NEXT:    vmv1r.v v0, v12
+; ZVFH-NEXT:    vmv1r.v v0, v16
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_round_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v12, v0
+; ZVFHMIN-NEXT:    vmv1r.v v24, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
-; ZVFHMIN-NEXT:    vmv1r.v v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v25, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v8, v24, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v25, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    fsrmi a0, 4
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmv1r.v v0, v25
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v12
+; ZVFHMIN-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -829,21 +829,21 @@ define <vscale x 32 x half> @vp_round_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFH-LABEL: vp_round_nxv32f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; ZVFH-NEXT:    vmv1r.v v16, v0
-; ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-NEXT:    vmv1r.v v24, v0
+; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; ZVFH-NEXT:    li a0, 25
 ; ZVFH-NEXT:    slli a0, a0, 10
 ; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 4
-; ZVFH-NEXT:    vmv1r.v v0, v16
+; ZVFH-NEXT:    vmv1r.v v0, v24
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_round_nxv32f16:
@@ -1064,20 +1064,20 @@ define <vscale x 4 x float> @vp_round_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: vp_round_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x float> @llvm.vp.round.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x float> %v
@@ -1106,20 +1106,20 @@ define <vscale x 8 x float> @vp_round_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: vp_round_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x float> @llvm.vp.round.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x float> %v
@@ -1148,20 +1148,20 @@ define <vscale x 16 x float> @vp_round_nxv16f32(<vscale x 16 x float> %va, <vsca
 ; CHECK-LABEL: vp_round_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x float> @llvm.vp.round.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x float> %v
@@ -1330,79 +1330,79 @@ define <vscale x 2 x double> @vp_round_nxv2f64(<vscale x 2 x double> %va, <vscal
 ; RV32ZVFH-LABEL: vp_round_nxv2f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v10, v0
+; RV32ZVFH-NEXT:    vmv1r.v v12, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI36_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI36_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 4
-; RV32ZVFH-NEXT:    vmv1r.v v0, v10
+; RV32ZVFH-NEXT:    vmv1r.v v0, v12
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_round_nxv2f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v10, v0
-; RV64ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v12, v0
+; RV64ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 4
-; RV64ZVFH-NEXT:    vmv1r.v v0, v10
+; RV64ZVFH-NEXT:    vmv1r.v v0, v12
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_round_nxv2f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v10, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v12, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI36_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI36_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v10, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 4
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_round_nxv2f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v10, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v12, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v10, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 4
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.round.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
@@ -1478,79 +1478,79 @@ define <vscale x 4 x double> @vp_round_nxv4f64(<vscale x 4 x double> %va, <vscal
 ; RV32ZVFH-LABEL: vp_round_nxv4f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v12, v0
+; RV32ZVFH-NEXT:    vmv1r.v v16, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI38_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI38_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 4
-; RV32ZVFH-NEXT:    vmv1r.v v0, v12
+; RV32ZVFH-NEXT:    vmv1r.v v0, v16
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_round_nxv4f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v12, v0
-; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v16, v0
+; RV64ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 4
-; RV64ZVFH-NEXT:    vmv1r.v v0, v12
+; RV64ZVFH-NEXT:    vmv1r.v v0, v16
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_round_nxv4f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v12, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v16, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI38_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI38_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 4
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v12
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_round_nxv4f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v12, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 4
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v12
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.vp.round.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x double> %v
@@ -1626,79 +1626,79 @@ define <vscale x 7 x double> @vp_round_nxv7f64(<vscale x 7 x double> %va, <vscal
 ; RV32ZVFH-LABEL: vp_round_nxv7f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v16, v0
+; RV32ZVFH-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI40_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI40_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 4
-; RV32ZVFH-NEXT:    vmv1r.v v0, v16
+; RV32ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_round_nxv7f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v16, v0
-; RV64ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v24, v0
+; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 4
-; RV64ZVFH-NEXT:    vmv1r.v v0, v16
+; RV64ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_round_nxv7f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v16, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI40_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI40_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 4
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_round_nxv7f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v24, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 4
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <vscale x 7 x double> @llvm.vp.round.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x double> %v
@@ -1774,79 +1774,79 @@ define <vscale x 8 x double> @vp_round_nxv8f64(<vscale x 8 x double> %va, <vscal
 ; RV32ZVFH-LABEL: vp_round_nxv8f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v16, v0
+; RV32ZVFH-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI42_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI42_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 4
-; RV32ZVFH-NEXT:    vmv1r.v v0, v16
+; RV32ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_round_nxv8f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v16, v0
-; RV64ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v24, v0
+; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 4
-; RV64ZVFH-NEXT:    vmv1r.v v0, v16
+; RV64ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_round_nxv8f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v16, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI42_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI42_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 4
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_round_nxv8f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v24, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 4
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.round.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x double> %v

diff  --git a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll
index 37a9ec1c0a8aa..b9c435b7fb8d8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll
@@ -120,24 +120,24 @@ define <vscale x 4 x bfloat> @vp_roundeven_nxv4bf16(<vscale x 4 x bfloat> %va, <
 ; CHECK-LABEL: vp_roundeven_nxv4bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v9, v0
+; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
-; CHECK-NEXT:    vmv1r.v v8, v0
+; CHECK-NEXT:    vmv1r.v v13, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v10, v0.t
+; CHECK-NEXT:    vfabs.v v8, v10, v0.t
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v8, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v13, v8, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v10, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v8, v10, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v9
+; CHECK-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10, v0.t
 ; CHECK-NEXT:    ret
@@ -172,24 +172,24 @@ define <vscale x 8 x bfloat> @vp_roundeven_nxv8bf16(<vscale x 8 x bfloat> %va, <
 ; CHECK-LABEL: vp_roundeven_nxv8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
+; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
-; CHECK-NEXT:    vmv1r.v v8, v0
+; CHECK-NEXT:    vmv1r.v v17, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v12, v0.t
+; CHECK-NEXT:    vfabs.v v8, v12, v0.t
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v8, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v17, v8, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmv1r.v v0, v17
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v8, v12, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12, v0.t
 ; CHECK-NEXT:    ret
@@ -224,24 +224,24 @@ define <vscale x 16 x bfloat> @vp_roundeven_nxv16bf16(<vscale x 16 x bfloat> %va
 ; CHECK-LABEL: vp_roundeven_nxv16bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmv1r.v v24, v0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
-; CHECK-NEXT:    vmv1r.v v8, v0
+; CHECK-NEXT:    vmv1r.v v25, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v16, v0.t
+; CHECK-NEXT:    vfabs.v v8, v16, v0.t
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v8, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v25, v8, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
 ; CHECK-NEXT:    ret
@@ -585,24 +585,24 @@ define <vscale x 4 x half> @vp_roundeven_nxv4f16(<vscale x 4 x half> %va, <vscal
 ; ZVFHMIN-LABEL: vp_roundeven_nxv4f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v9, v0
+; ZVFHMIN-NEXT:    vmv1r.v v12, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
-; ZVFHMIN-NEXT:    vmv1r.v v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v13, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v8, v12, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v13, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    fsrmi a0, 0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmv1r.v v0, v13
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v9
+; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -653,44 +653,44 @@ define <vscale x 8 x half> @vp_roundeven_nxv8f16(<vscale x 8 x half> %va, <vscal
 ; ZVFH-LABEL: vp_roundeven_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vmv1r.v v12, v0
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    li a0, 25
 ; ZVFH-NEXT:    slli a0, a0, 10
 ; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 0
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmv1r.v v0, v12
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_roundeven_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v10, v0
+; ZVFHMIN-NEXT:    vmv1r.v v16, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
-; ZVFHMIN-NEXT:    vmv1r.v v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v17, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v8, v16, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v17, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    fsrmi a0, 0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmv1r.v v0, v17
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; ZVFHMIN-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -741,44 +741,44 @@ define <vscale x 16 x half> @vp_roundeven_nxv16f16(<vscale x 16 x half> %va, <vs
 ; ZVFH-LABEL: vp_roundeven_nxv16f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFH-NEXT:    vmv1r.v v12, v0
-; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-NEXT:    vmv1r.v v16, v0
+; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; ZVFH-NEXT:    li a0, 25
 ; ZVFH-NEXT:    slli a0, a0, 10
 ; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 0
-; ZVFH-NEXT:    vmv1r.v v0, v12
+; ZVFH-NEXT:    vmv1r.v v0, v16
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_roundeven_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v12, v0
+; ZVFHMIN-NEXT:    vmv1r.v v24, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
-; ZVFHMIN-NEXT:    vmv1r.v v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v25, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v8, v24, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v25, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    fsrmi a0, 0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmv1r.v v0, v25
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v12
+; ZVFHMIN-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -829,21 +829,21 @@ define <vscale x 32 x half> @vp_roundeven_nxv32f16(<vscale x 32 x half> %va, <vs
 ; ZVFH-LABEL: vp_roundeven_nxv32f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; ZVFH-NEXT:    vmv1r.v v16, v0
-; ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-NEXT:    vmv1r.v v24, v0
+; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; ZVFH-NEXT:    li a0, 25
 ; ZVFH-NEXT:    slli a0, a0, 10
 ; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 0
-; ZVFH-NEXT:    vmv1r.v v0, v16
+; ZVFH-NEXT:    vmv1r.v v0, v24
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_roundeven_nxv32f16:
@@ -1064,20 +1064,20 @@ define <vscale x 4 x float> @vp_roundeven_nxv4f32(<vscale x 4 x float> %va, <vsc
 ; CHECK-LABEL: vp_roundeven_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x float> @llvm.vp.roundeven.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x float> %v
@@ -1106,20 +1106,20 @@ define <vscale x 8 x float> @vp_roundeven_nxv8f32(<vscale x 8 x float> %va, <vsc
 ; CHECK-LABEL: vp_roundeven_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x float> @llvm.vp.roundeven.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x float> %v
@@ -1148,20 +1148,20 @@ define <vscale x 16 x float> @vp_roundeven_nxv16f32(<vscale x 16 x float> %va, <
 ; CHECK-LABEL: vp_roundeven_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x float> @llvm.vp.roundeven.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x float> %v
@@ -1330,79 +1330,79 @@ define <vscale x 2 x double> @vp_roundeven_nxv2f64(<vscale x 2 x double> %va, <v
 ; RV32ZVFH-LABEL: vp_roundeven_nxv2f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v10, v0
+; RV32ZVFH-NEXT:    vmv1r.v v12, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI36_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI36_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 0
-; RV32ZVFH-NEXT:    vmv1r.v v0, v10
+; RV32ZVFH-NEXT:    vmv1r.v v0, v12
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_roundeven_nxv2f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v10, v0
-; RV64ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v12, v0
+; RV64ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 0
-; RV64ZVFH-NEXT:    vmv1r.v v0, v10
+; RV64ZVFH-NEXT:    vmv1r.v v0, v12
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_roundeven_nxv2f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v10, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v12, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI36_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI36_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v10, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 0
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_roundeven_nxv2f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v10, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v12, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v10, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 0
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.roundeven.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
@@ -1478,79 +1478,79 @@ define <vscale x 4 x double> @vp_roundeven_nxv4f64(<vscale x 4 x double> %va, <v
 ; RV32ZVFH-LABEL: vp_roundeven_nxv4f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v12, v0
+; RV32ZVFH-NEXT:    vmv1r.v v16, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI38_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI38_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 0
-; RV32ZVFH-NEXT:    vmv1r.v v0, v12
+; RV32ZVFH-NEXT:    vmv1r.v v0, v16
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_roundeven_nxv4f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v12, v0
-; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v16, v0
+; RV64ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 0
-; RV64ZVFH-NEXT:    vmv1r.v v0, v12
+; RV64ZVFH-NEXT:    vmv1r.v v0, v16
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_roundeven_nxv4f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v12, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v16, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI38_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI38_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 0
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v12
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_roundeven_nxv4f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v12, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 0
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v12
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.vp.roundeven.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x double> %v
@@ -1626,79 +1626,79 @@ define <vscale x 7 x double> @vp_roundeven_nxv7f64(<vscale x 7 x double> %va, <v
 ; RV32ZVFH-LABEL: vp_roundeven_nxv7f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v16, v0
+; RV32ZVFH-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI40_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI40_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 0
-; RV32ZVFH-NEXT:    vmv1r.v v0, v16
+; RV32ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_roundeven_nxv7f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v16, v0
-; RV64ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v24, v0
+; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 0
-; RV64ZVFH-NEXT:    vmv1r.v v0, v16
+; RV64ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_roundeven_nxv7f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v16, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI40_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI40_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 0
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_roundeven_nxv7f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v24, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 0
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <vscale x 7 x double> @llvm.vp.roundeven.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x double> %v
@@ -1774,79 +1774,79 @@ define <vscale x 8 x double> @vp_roundeven_nxv8f64(<vscale x 8 x double> %va, <v
 ; RV32ZVFH-LABEL: vp_roundeven_nxv8f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v16, v0
+; RV32ZVFH-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI42_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI42_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 0
-; RV32ZVFH-NEXT:    vmv1r.v v0, v16
+; RV32ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_roundeven_nxv8f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v16, v0
-; RV64ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v24, v0
+; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 0
-; RV64ZVFH-NEXT:    vmv1r.v v0, v16
+; RV64ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_roundeven_nxv8f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v16, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI42_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI42_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 0
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_roundeven_nxv8f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v24, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 0
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.roundeven.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x double> %v

diff  --git a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll
index 5553b988fec97..7f4aaff7f8635 100644
--- a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll
@@ -120,24 +120,24 @@ define <vscale x 4 x bfloat> @vp_roundtozero_nxv4bf16(<vscale x 4 x bfloat> %va,
 ; CHECK-LABEL: vp_roundtozero_nxv4bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v9, v0
+; CHECK-NEXT:    vmv1r.v v12, v0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
-; CHECK-NEXT:    vmv1r.v v8, v0
+; CHECK-NEXT:    vmv1r.v v13, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfabs.v v12, v10, v0.t
+; CHECK-NEXT:    vfabs.v v8, v10, v0.t
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v8, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v13, v8, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v10, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v8, v10, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v9
+; CHECK-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10, v0.t
 ; CHECK-NEXT:    ret
@@ -172,24 +172,24 @@ define <vscale x 8 x bfloat> @vp_roundtozero_nxv8bf16(<vscale x 8 x bfloat> %va,
 ; CHECK-LABEL: vp_roundtozero_nxv8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
+; CHECK-NEXT:    vmv1r.v v16, v0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
-; CHECK-NEXT:    vmv1r.v v8, v0
+; CHECK-NEXT:    vmv1r.v v17, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v12, v0.t
+; CHECK-NEXT:    vfabs.v v8, v12, v0.t
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v8, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v17, v8, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmv1r.v v0, v17
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v8, v12, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12, v0.t
 ; CHECK-NEXT:    ret
@@ -224,24 +224,24 @@ define <vscale x 16 x bfloat> @vp_roundtozero_nxv16bf16(<vscale x 16 x bfloat> %
 ; CHECK-LABEL: vp_roundtozero_nxv16bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmv1r.v v24, v0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
-; CHECK-NEXT:    vmv1r.v v8, v0
+; CHECK-NEXT:    vmv1r.v v25, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v24, v16, v0.t
+; CHECK-NEXT:    vfabs.v v8, v16, v0.t
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v8, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v25, v8, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
 ; CHECK-NEXT:    ret
@@ -585,24 +585,24 @@ define <vscale x 4 x half> @vp_roundtozero_nxv4f16(<vscale x 4 x half> %va, <vsc
 ; ZVFHMIN-LABEL: vp_roundtozero_nxv4f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v9, v0
+; ZVFHMIN-NEXT:    vmv1r.v v12, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
-; ZVFHMIN-NEXT:    vmv1r.v v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v13, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v8, v12, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v13, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    fsrmi a0, 1
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmv1r.v v0, v13
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v9
+; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -653,44 +653,44 @@ define <vscale x 8 x half> @vp_roundtozero_nxv8f16(<vscale x 8 x half> %va, <vsc
 ; ZVFH-LABEL: vp_roundtozero_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vmv1r.v v12, v0
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    li a0, 25
 ; ZVFH-NEXT:    slli a0, a0, 10
 ; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 1
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmv1r.v v0, v12
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_roundtozero_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v10, v0
+; ZVFHMIN-NEXT:    vmv1r.v v16, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
-; ZVFHMIN-NEXT:    vmv1r.v v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v17, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v8, v16, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v17, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    fsrmi a0, 1
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmv1r.v v0, v17
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v12, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; ZVFHMIN-NEXT:    vfsgnj.vv v12, v8, v12, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -741,44 +741,44 @@ define <vscale x 16 x half> @vp_roundtozero_nxv16f16(<vscale x 16 x half> %va, <
 ; ZVFH-LABEL: vp_roundtozero_nxv16f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFH-NEXT:    vmv1r.v v12, v0
-; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-NEXT:    vmv1r.v v16, v0
+; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; ZVFH-NEXT:    li a0, 25
 ; ZVFH-NEXT:    slli a0, a0, 10
 ; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 1
-; ZVFH-NEXT:    vmv1r.v v0, v12
+; ZVFH-NEXT:    vmv1r.v v0, v16
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_roundtozero_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v12, v0
+; ZVFHMIN-NEXT:    vmv1r.v v24, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    lui a0, 307200
-; ZVFHMIN-NEXT:    vmv1r.v v8, v0
+; ZVFHMIN-NEXT:    vmv1r.v v25, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v8, v24, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v25, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    fsrmi a0, 1
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmv1r.v v0, v25
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    fsrm a0
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v12
+; ZVFHMIN-NEXT:    vfsgnj.vv v16, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -829,21 +829,21 @@ define <vscale x 32 x half> @vp_roundtozero_nxv32f16(<vscale x 32 x half> %va, <
 ; ZVFH-LABEL: vp_roundtozero_nxv32f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; ZVFH-NEXT:    vmv1r.v v16, v0
-; ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-NEXT:    vmv1r.v v24, v0
+; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; ZVFH-NEXT:    li a0, 25
 ; ZVFH-NEXT:    slli a0, a0, 10
 ; ZVFH-NEXT:    fmv.h.x fa5, a0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 1
-; ZVFH-NEXT:    vmv1r.v v0, v16
+; ZVFH-NEXT:    vmv1r.v v0, v24
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_roundtozero_nxv32f16:
@@ -1064,20 +1064,20 @@ define <vscale x 4 x float> @vp_roundtozero_nxv4f32(<vscale x 4 x float> %va, <v
 ; CHECK-LABEL: vp_roundtozero_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x float> @llvm.vp.roundtozero.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x float> %v
@@ -1106,20 +1106,20 @@ define <vscale x 8 x float> @vp_roundtozero_nxv8f32(<vscale x 8 x float> %va, <v
 ; CHECK-LABEL: vp_roundtozero_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x float> @llvm.vp.roundtozero.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x float> %v
@@ -1148,20 +1148,20 @@ define <vscale x 16 x float> @vp_roundtozero_nxv16f32(<vscale x 16 x float> %va,
 ; CHECK-LABEL: vp_roundtozero_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x float> @llvm.vp.roundtozero.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x float> %v
@@ -1330,79 +1330,79 @@ define <vscale x 2 x double> @vp_roundtozero_nxv2f64(<vscale x 2 x double> %va,
 ; RV32ZVFH-LABEL: vp_roundtozero_nxv2f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v10, v0
+; RV32ZVFH-NEXT:    vmv1r.v v12, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI36_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI36_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 1
-; RV32ZVFH-NEXT:    vmv1r.v v0, v10
+; RV32ZVFH-NEXT:    vmv1r.v v0, v12
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_roundtozero_nxv2f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v10, v0
-; RV64ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v12, v0
+; RV64ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 1
-; RV64ZVFH-NEXT:    vmv1r.v v0, v10
+; RV64ZVFH-NEXT:    vmv1r.v v0, v12
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_roundtozero_nxv2f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v10, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v12, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI36_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI36_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v10, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 1
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_roundtozero_nxv2f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v10, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v12, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v10, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v12, v10, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 1
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v10
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v12
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.roundtozero.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
@@ -1478,79 +1478,79 @@ define <vscale x 4 x double> @vp_roundtozero_nxv4f64(<vscale x 4 x double> %va,
 ; RV32ZVFH-LABEL: vp_roundtozero_nxv4f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v12, v0
+; RV32ZVFH-NEXT:    vmv1r.v v16, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI38_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI38_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 1
-; RV32ZVFH-NEXT:    vmv1r.v v0, v12
+; RV32ZVFH-NEXT:    vmv1r.v v0, v16
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_roundtozero_nxv4f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v12, v0
-; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v16, v0
+; RV64ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 1
-; RV64ZVFH-NEXT:    vmv1r.v v0, v12
+; RV64ZVFH-NEXT:    vmv1r.v v0, v16
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_roundtozero_nxv4f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v12, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v16, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI38_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI38_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 1
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v12
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_roundtozero_nxv4f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v12, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v12, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v12, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 1
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v12
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.vp.roundtozero.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x double> %v
@@ -1626,79 +1626,79 @@ define <vscale x 7 x double> @vp_roundtozero_nxv7f64(<vscale x 7 x double> %va,
 ; RV32ZVFH-LABEL: vp_roundtozero_nxv7f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v16, v0
+; RV32ZVFH-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI40_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI40_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 1
-; RV32ZVFH-NEXT:    vmv1r.v v0, v16
+; RV32ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_roundtozero_nxv7f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v16, v0
-; RV64ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v24, v0
+; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 1
-; RV64ZVFH-NEXT:    vmv1r.v v0, v16
+; RV64ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_roundtozero_nxv7f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v16, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI40_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI40_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 1
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_roundtozero_nxv7f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v24, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 1
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <vscale x 7 x double> @llvm.vp.roundtozero.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x double> %v
@@ -1774,79 +1774,79 @@ define <vscale x 8 x double> @vp_roundtozero_nxv8f64(<vscale x 8 x double> %va,
 ; RV32ZVFH-LABEL: vp_roundtozero_nxv8f64:
 ; RV32ZVFH:       # %bb.0:
 ; RV32ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vmv1r.v v16, v0
+; RV32ZVFH-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFH-NEXT:    lui a0, %hi(.LCPI42_0)
 ; RV32ZVFH-NEXT:    fld fa5, %lo(.LCPI42_0)(a0)
-; RV32ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFH-NEXT:    fsrmi a0, 1
-; RV32ZVFH-NEXT:    vmv1r.v v0, v16
+; RV32ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFH-NEXT:    fsrm a0
-; RV32ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFH-NEXT:    ret
 ;
 ; RV64ZVFH-LABEL: vp_roundtozero_nxv8f64:
 ; RV64ZVFH:       # %bb.0:
 ; RV64ZVFH-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vmv1r.v v16, v0
-; RV64ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vmv1r.v v24, v0
+; RV64ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    li a0, 1075
 ; RV64ZVFH-NEXT:    slli a0, a0, 52
 ; RV64ZVFH-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFH-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFH-NEXT:    fsrmi a0, 1
-; RV64ZVFH-NEXT:    vmv1r.v v0, v16
+; RV64ZVFH-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFH-NEXT:    fsrm a0
-; RV64ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFH-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFH-NEXT:    ret
 ;
 ; RV32ZVFHMIN-LABEL: vp_roundtozero_nxv8f64:
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vmv1r.v v16, v0
+; RV32ZVFHMIN-NEXT:    vmv1r.v v24, v0
 ; RV32ZVFHMIN-NEXT:    lui a0, %hi(.LCPI42_0)
 ; RV32ZVFHMIN-NEXT:    fld fa5, %lo(.LCPI42_0)(a0)
-; RV32ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV32ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrmi a0, 1
-; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV32ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    fsrm a0
-; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV32ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV32ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV32ZVFHMIN-NEXT:    ret
 ;
 ; RV64ZVFHMIN-LABEL: vp_roundtozero_nxv8f64:
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vmv1r.v v16, v0
-; RV64ZVFHMIN-NEXT:    vfabs.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vmv1r.v v24, v0
+; RV64ZVFHMIN-NEXT:    vfabs.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    li a0, 1075
 ; RV64ZVFHMIN-NEXT:    slli a0, a0, 52
 ; RV64ZVFHMIN-NEXT:    fmv.d.x fa5, a0
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; RV64ZVFHMIN-NEXT:    vmflt.vf v24, v16, fa5, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrmi a0, 1
-; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v24
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    fsrm a0
-; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; RV64ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; RV64ZVFHMIN-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; RV64ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.roundtozero.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x double> %v

diff  --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
index 634e58198def3..9e0842c83f9ad 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
@@ -1485,10 +1485,10 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v10, v16, v24, v0.t
-; CHECK-NEXT:    vmv1r.v v9, v7
+; CHECK-NEXT:    vmfeq.vv v9, v16, v24, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v7
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vx v9, v5, a3
+; CHECK-NEXT:    vslideup.vx v8, v5, a3
 ; CHECK-NEXT:    bltu a2, a4, .LBB85_6
 ; CHECK-NEXT:  # %bb.5:
 ; CHECK-NEXT:    mv a2, a4
@@ -1523,12 +1523,12 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v8, v16, v24, v0.t
+; CHECK-NEXT:    vmfeq.vv v10, v16, v24, v0.t
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vx v8, v10, a3
+; CHECK-NEXT:    vslideup.vx v10, v9, a3
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vslideup.vx v8, v9, a1
-; CHECK-NEXT:    vmv.v.v v0, v8
+; CHECK-NEXT:    vslideup.vx v10, v8, a1
+; CHECK-NEXT:    vmv.v.v v0, v10
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    mv a1, a0
 ; CHECK-NEXT:    slli a0, a0, 1
@@ -3673,10 +3673,10 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v10, v16, v24, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v9, v7
+; ZVFHMIN-NEXT:    vmfeq.vv v9, v16, v24, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v8, v7
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslideup.vx v9, v5, a3
+; ZVFHMIN-NEXT:    vslideup.vx v8, v5, a3
 ; ZVFHMIN-NEXT:    bltu a2, a4, .LBB171_6
 ; ZVFHMIN-NEXT:  # %bb.5:
 ; ZVFHMIN-NEXT:    mv a2, a4
@@ -3711,12 +3711,12 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v24, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v10, v16, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslideup.vx v8, v10, a3
+; ZVFHMIN-NEXT:    vslideup.vx v10, v9, a3
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; ZVFHMIN-NEXT:    vslideup.vx v8, v9, a1
-; ZVFHMIN-NEXT:    vmv.v.v v0, v8
+; ZVFHMIN-NEXT:    vslideup.vx v10, v8, a1
+; ZVFHMIN-NEXT:    vmv.v.v v0, v10
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    mv a1, a0
 ; ZVFHMIN-NEXT:    slli a0, a0, 1

diff  --git a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll
index c1de57bf850ac..ceeeba466014c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll
@@ -1852,9 +1852,9 @@ define <vscale x 8 x i1> @icmp_uge_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v
 ; CHECK-LABEL: icmp_uge_vx_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vmsleu.vv v12, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmsleu.vv v16, v12, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -2030,9 +2030,9 @@ define <vscale x 8 x i1> @icmp_sge_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v
 ; CHECK-LABEL: icmp_sge_vx_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vmsle.vv v12, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmsle.vv v16, v12, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -2162,9 +2162,9 @@ define <vscale x 8 x i1> @icmp_sle_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 %
 ; CHECK-LABEL: icmp_sle_vx_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vmsle.vv v12, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmsle.vv v16, v12, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -3069,9 +3069,9 @@ define <vscale x 8 x i1> @icmp_eq_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vs
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmseq.vv v16, v8, v24, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmseq.vv v24, v8, v16, v0.t
+; RV32-NEXT:    vmv1r.v v0, v24
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3097,9 +3097,9 @@ define <vscale x 8 x i1> @icmp_eq_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %b
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmseq.vv v16, v24, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmseq.vv v24, v16, v8, v0.t
+; RV32-NEXT:    vmv1r.v v0, v24
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3158,9 +3158,9 @@ define <vscale x 8 x i1> @icmp_ne_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vs
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmsne.vv v16, v8, v24, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmsne.vv v24, v8, v16, v0.t
+; RV32-NEXT:    vmv1r.v v0, v24
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3186,9 +3186,9 @@ define <vscale x 8 x i1> @icmp_ne_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %b
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmsne.vv v16, v24, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmsne.vv v24, v16, v8, v0.t
+; RV32-NEXT:    vmv1r.v v0, v24
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3247,9 +3247,9 @@ define <vscale x 8 x i1> @icmp_ugt_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmsltu.vv v16, v24, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmsltu.vv v24, v16, v8, v0.t
+; RV32-NEXT:    vmv1r.v v0, v24
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3275,9 +3275,9 @@ define <vscale x 8 x i1> @icmp_ugt_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmsltu.vv v16, v8, v24, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmsltu.vv v24, v8, v16, v0.t
+; RV32-NEXT:    vmv1r.v v0, v24
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3336,9 +3336,9 @@ define <vscale x 8 x i1> @icmp_uge_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmsleu.vv v16, v24, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmsleu.vv v24, v16, v8, v0.t
+; RV32-NEXT:    vmv1r.v v0, v24
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3346,9 +3346,9 @@ define <vscale x 8 x i1> @icmp_uge_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ; RV64-LABEL: icmp_uge_vx_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmv.v.x v24, a0
-; RV64-NEXT:    vmsleu.vv v16, v24, v8, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmv.v.x v16, a0
+; RV64-NEXT:    vmsleu.vv v24, v16, v8, v0.t
+; RV64-NEXT:    vmv1r.v v0, v24
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3365,9 +3365,9 @@ define <vscale x 8 x i1> @icmp_uge_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmsleu.vv v16, v8, v24, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmsleu.vv v24, v8, v16, v0.t
+; RV32-NEXT:    vmv1r.v v0, v24
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3426,9 +3426,9 @@ define <vscale x 8 x i1> @icmp_ult_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmsltu.vv v16, v8, v24, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmsltu.vv v24, v8, v16, v0.t
+; RV32-NEXT:    vmv1r.v v0, v24
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3454,9 +3454,9 @@ define <vscale x 8 x i1> @icmp_ult_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmsltu.vv v16, v24, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmsltu.vv v24, v16, v8, v0.t
+; RV32-NEXT:    vmv1r.v v0, v24
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3515,9 +3515,9 @@ define <vscale x 8 x i1> @icmp_sgt_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmslt.vv v16, v24, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmslt.vv v24, v16, v8, v0.t
+; RV32-NEXT:    vmv1r.v v0, v24
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3543,9 +3543,9 @@ define <vscale x 8 x i1> @icmp_sgt_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmslt.vv v16, v8, v24, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmslt.vv v24, v8, v16, v0.t
+; RV32-NEXT:    vmv1r.v v0, v24
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3604,9 +3604,9 @@ define <vscale x 8 x i1> @icmp_sge_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmsle.vv v16, v24, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmsle.vv v24, v16, v8, v0.t
+; RV32-NEXT:    vmv1r.v v0, v24
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3614,9 +3614,9 @@ define <vscale x 8 x i1> @icmp_sge_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ; RV64-LABEL: icmp_sge_vx_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmv.v.x v24, a0
-; RV64-NEXT:    vmsle.vv v16, v24, v8, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmv.v.x v16, a0
+; RV64-NEXT:    vmsle.vv v24, v16, v8, v0.t
+; RV64-NEXT:    vmv1r.v v0, v24
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3633,9 +3633,9 @@ define <vscale x 8 x i1> @icmp_sge_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmsle.vv v16, v8, v24, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmsle.vv v24, v8, v16, v0.t
+; RV32-NEXT:    vmv1r.v v0, v24
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3694,9 +3694,9 @@ define <vscale x 8 x i1> @icmp_slt_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmslt.vv v16, v8, v24, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmslt.vv v24, v8, v16, v0.t
+; RV32-NEXT:    vmv1r.v v0, v24
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3722,9 +3722,9 @@ define <vscale x 8 x i1> @icmp_slt_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmslt.vv v16, v24, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmslt.vv v24, v16, v8, v0.t
+; RV32-NEXT:    vmv1r.v v0, v24
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3783,9 +3783,9 @@ define <vscale x 8 x i1> @icmp_sle_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmsle.vv v16, v8, v24, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmsle.vv v24, v8, v16, v0.t
+; RV32-NEXT:    vmv1r.v v0, v24
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3811,9 +3811,9 @@ define <vscale x 8 x i1> @icmp_sle_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmsle.vv v16, v24, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmsle.vv v24, v16, v8, v0.t
+; RV32-NEXT:    vmv1r.v v0, v24
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3821,9 +3821,9 @@ define <vscale x 8 x i1> @icmp_sle_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
 ; RV64-LABEL: icmp_sle_vx_swap_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmv.v.x v24, a0
-; RV64-NEXT:    vmsle.vv v16, v24, v8, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmv.v.x v16, a0
+; RV64-NEXT:    vmsle.vv v24, v16, v8, v0.t
+; RV64-NEXT:    vmv1r.v v0, v24
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp-mask.ll
index 7924bc83d824c..8b150e4c8d215 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp-mask.ll
@@ -96,9 +96,9 @@ define <vscale x 2 x i1> @vfptosi_nxv2i1_nxv2f64(<vscale x 2 x double> %va, <vsc
 ; CHECK-LABEL: vfptosi_nxv2i1_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    vmsne.vi v8, v10, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v8, v0.t
+; CHECK-NEXT:    vmsne.vi v10, v8, 0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i1> @llvm.vp.fptosi.nxv2i1.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i1> %v

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp-mask.ll
index dba3f6cec2da0..3df033384c88e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp-mask.ll
@@ -96,9 +96,9 @@ define <vscale x 2 x i1> @vfptoui_nxv2i1_nxv2f64(<vscale x 2 x double> %va, <vsc
 ; CHECK-LABEL: vfptoui_nxv2i1_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v10, v8, v0.t
-; CHECK-NEXT:    vmsne.vi v8, v10, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v8, v0.t
+; CHECK-NEXT:    vmsne.vi v10, v8, 0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i1> @llvm.vp.fptoui.nxv2i1.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i1> %v

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
index 81a2388421cee..a91584b11fa63 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
@@ -1280,10 +1280,10 @@ name: vmop_vv_consumer_incompatible_eew
 body: |
   bb.0:
     ; CHECK-LABEL: name: vmop_vv_consumer_incompatible_eew
-    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK: %x:vrnov0 = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
     ; CHECK-NEXT: %y:vr = PseudoVMSEQ_VV_M1 $noreg, %x, 1, 4 /* e16 */
     ; CHECK-NEXT: $v8 = COPY %y
-    %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
+    %x:vrnov0 = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
     %y:vr = PseudoVMSEQ_VV_M1 $noreg, %x, 1, 4 /* e16 */
     $v8 = COPY %y
 ...

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll
index 21a4143f323ee..44cccb7a3c321 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll
@@ -2443,10 +2443,10 @@ define <vscale x 2 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv2i64_i64(<vscale
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vmv1r.v v10, v0
-; RV32-NEXT:    vmsle.vv v10, v12, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v10
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vmv1r.v v12, v0
+; RV32-NEXT:    vmsle.vv v12, v10, v8, v0.t
+; RV32-NEXT:    vmv1r.v v0, v12
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
@@ -2475,10 +2475,10 @@ define <vscale x 4 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv4i64_i64(<vscale
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    vmv1r.v v12, v0
-; RV32-NEXT:    vmsle.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vmv1r.v v16, v0
+; RV32-NEXT:    vmsle.vv v16, v12, v8, v0.t
+; RV32-NEXT:    vmv1r.v v0, v16
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll
index 4795e86983089..7a22dff27c44c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll
@@ -2423,10 +2423,10 @@ define <vscale x 2 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv2i64_i64(<vscale
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vmv1r.v v10, v0
-; RV32-NEXT:    vmsleu.vv v10, v12, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v10
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vmv1r.v v12, v0
+; RV32-NEXT:    vmsleu.vv v12, v10, v8, v0.t
+; RV32-NEXT:    vmv1r.v v0, v12
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
@@ -2455,10 +2455,10 @@ define <vscale x 4 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv4i64_i64(<vscale
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    vmv1r.v v12, v0
-; RV32-NEXT:    vmsleu.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vmv1r.v v16, v0
+; RV32-NEXT:    vmsleu.vv v16, v12, v8, v0.t
+; RV32-NEXT:    vmv1r.v v0, v16
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll
index 4b86cc771d617..c1adb7d70e434 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll
@@ -50,9 +50,9 @@ define <vscale x 2 x i1> @vtrunc_nxv2i1_nxv2i64(<vscale x 2 x i64> %a, <vscale x
 ; CHECK-LABEL: vtrunc_nxv2i1_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vand.vi v10, v8, 1, v0.t
-; CHECK-NEXT:    vmsne.vi v8, v10, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vmsne.vi v10, v8, 0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i1> @llvm.vp.trunc.nxv2i1.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %m, i32 %vl)
   ret <vscale x 2 x i1> %v


        


More information about the llvm-commits mailing list